Skip to content

Commit 510b9a6

Browse files
committed
feat(importer): Add rabin chunker to available importer chunker algorithms
This is required to have feature parity with go-ipfs which supports rabin chunking algorithm. Rabin chunker supports teh following `chunkerOptions`: `minChunkSize`, `avgChunkSize`, `window`, and `polynomial`. Rabin chunker uses the same defaults specified by the go-ipfs-chunker. Supports ipfs/js-ipfs#1283 License: MIT Signed-off-by: Dan Ordille <[email protected]>
1 parent dfc9f20 commit 510b9a6

File tree

6 files changed

+106
-7
lines changed

6 files changed

+106
-7
lines changed

package.json

+5-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
"leadMaintainer": "Alex Potsides <[email protected]>",
66
"main": "src/index.js",
77
"browser": {
8-
"fs": false
8+
"fs": false,
9+
"rabin": false
910
},
1011
"scripts": {
1112
"test": "aegir test",
@@ -72,7 +73,9 @@
7273
"pull-through": "^1.0.18",
7374
"pull-traverse": "^1.0.3",
7475
"pull-write": "^1.1.4",
75-
"sparse-array": "^1.3.1"
76+
"rabin": "^1.6.0",
77+
"sparse-array": "^1.3.1",
78+
"stream-to-pull-stream": "^1.7.2"
7679
},
7780
"contributors": [
7881
"Alan Shaw <[email protected]>",

src/builder/builder.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode
1616

1717
const defaultOptions = {
1818
chunkerOptions: {
19-
maxChunkSize: 262144
19+
maxChunkSize: 262144,
20+
avgChunkSize: 262144
2021
},
2122
rawLeaves: false,
2223
hashAlg: 'sha2-256',

src/chunker/index.js

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
'use strict'
2+
3+
const chunkers = {
4+
fixed: require('../chunker/fixed-size'),
5+
rabin: require('../chunker/rabin')
6+
}
7+
8+
module.exports = chunkers

src/chunker/rabin.js

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
'use strict'
2+
3+
const createRabin = require('rabin')
4+
const toPull = require('stream-to-pull-stream')
5+
6+
module.exports = (options) => {
7+
let min, max, avg
8+
if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) {
9+
avg = options.avgChunkSize
10+
min = options.minChunkSize
11+
max = options.maxChunkSize
12+
} else {
13+
avg = options.avgChunkSize
14+
min = avg / 3
15+
max = avg + (avg / 2)
16+
}
17+
18+
const sizepow = Math.floor(Math.log2(avg))
19+
const rabin = createRabin({
20+
min: min,
21+
max: max,
22+
bits: sizepow,
23+
window: options.window || 16,
24+
polynomial: options.polynomial || '0x3DF305DFB2A805'
25+
})
26+
27+
return toPull.duplex(rabin)
28+
}

src/importer/index.js

+1-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ const assert = require('assert')
88
const setImmediate = require('async/setImmediate')
99
const DAGBuilder = require('../builder')
1010
const createTreeBuilder = require('./tree-builder')
11-
12-
const chunkers = {
13-
fixed: require('../chunker/fixed-size')
14-
}
11+
const chunkers = require('../chunker')
1512

1613
const defaultOptions = {
1714
chunker: 'fixed',

test/chunker-rabin.js

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/* eslint-env mocha */
2+
'use strict'
3+
4+
const chunker = require('./../src/chunker/rabin')
5+
const chai = require('chai')
6+
chai.use(require('dirty-chai'))
7+
const expect = chai.expect
8+
const pull = require('pull-stream')
9+
const loadFixture = require('aegir/fixtures')
10+
11+
const rawFile = loadFixture('test/fixtures/1MiB.txt')
12+
13+
describe('chunker: rabin', function () {
14+
this.timeout(30000)
15+
16+
it('chunks non flat buffers', (done) => {
17+
const b1 = Buffer.alloc(2 * 256)
18+
const b2 = Buffer.alloc(1 * 256)
19+
const b3 = Buffer.alloc(5 * 256)
20+
21+
b1.fill('a')
22+
b2.fill('b')
23+
b3.fill('c')
24+
25+
pull(
26+
pull.values([b1, b2, b3]),
27+
chunker({minChunkSize: 48, avgChunkSize: 96, maxChunkSize: 192}),
28+
pull.collect((err, chunks) => {
29+
expect(err).to.not.exist()
30+
chunks.forEach((chunk) => {
31+
expect(chunk).to.have.length.gte(48)
32+
expect(chunk).to.have.length.lte(192)
33+
})
34+
done()
35+
})
36+
)
37+
})
38+
39+
it('256 KiB avg chunks of non scalar filesize', (done) => {
40+
const KiB256 = 262144
41+
let file = Buffer.concat([rawFile, Buffer.from('hello')])
42+
const opts = {
43+
minChunkSize: KiB256 / 3,
44+
avgChunkSize: KiB256,
45+
maxChunkSize: KiB256 + (KiB256 / 2)
46+
}
47+
pull(
48+
pull.values([file]),
49+
chunker(opts),
50+
pull.collect((err, chunks) => {
51+
expect(err).to.not.exist()
52+
53+
chunks.forEach((chunk) => {
54+
expect(chunk).to.have.length.gte(opts.minChunkSize)
55+
expect(chunk).to.have.length.lte(opts.maxChunkSize)
56+
})
57+
58+
done()
59+
})
60+
)
61+
})
62+
})

0 commit comments

Comments
 (0)