From d4128458908f543d696b3eb03ea71285a083cb1b Mon Sep 17 00:00:00 2001 From: Bernard Mordan Date: Mon, 4 Sep 2017 17:40:36 +0100 Subject: [PATCH 1/6] option for only-hash --- package.json | 2 +- src/builder/builder.js | 4 ++++ src/builder/reduce.js | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 843791d2..eb34ec84 100644 --- a/package.json +++ b/package.json @@ -91,4 +91,4 @@ "jbenet ", "nginnever " ] -} \ No newline at end of file +} diff --git a/src/builder/builder.js b/src/builder/builder.js index c9647bce..235b0451 100644 --- a/src/builder/builder.js +++ b/src/builder/builder.js @@ -60,6 +60,8 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options) waterfall([ (cb) => DAGNode.create(d.marshal(), [], options.hashAlg, cb), (node, cb) => { + if (options['only-hash']) return cb(null, node) + ipldResolver.put(node, { cid: new CID(node.multihash) }, (err) => cb(err, node)) @@ -101,6 +103,8 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options) }) }), pull.asyncMap((leaf, callback) => { + if (options['only-hash']) return callback(null, leaf) + ipldResolver.put(leaf.DAGNode, { cid: new CID(leaf.DAGNode.multihash) }, (err) => callback(err, leaf) diff --git a/src/builder/reduce.js b/src/builder/reduce.js index 83d43ac7..3277e1ce 100644 --- a/src/builder/reduce.js +++ b/src/builder/reduce.js @@ -34,6 +34,8 @@ module.exports = function (file, ipldResolver, options) { waterfall([ (cb) => DAGNode.create(f.marshal(), links, cb), (node, cb) => { + if (options['only-hash']) return cb(null, node) + ipldResolver.put(node, { cid: new CID(node.multihash) }, (err) => cb(err, node)) From c5c27d8219da670da261f249f966879d46702ad3 Mon Sep 17 00:00:00 2001 From: Bernard Mordan Date: Wed, 6 Sep 2017 10:59:02 +0100 Subject: [PATCH 2/6] Adds tests for only-hash option --only-hash will chunk and hash input - but not write it to disk --- package.json | 4 + src/builder/builder.js | 6 +- src/builder/reduce.js | 3 +- src/importer/dir-flat.js | 12 +- src/importer/dir-sharded.js | 10 +- src/importer/dir.js | 4 + src/importer/flat-to-shard.js | 14 +- src/importer/flush-tree.js | 326 +++++++++++++++++----------------- src/importer/index.js | 2 +- src/importer/tree-builder.js | 11 +- test/node.js | 1 + test/test-importer.js | 32 ++++ test/test-only-hash.js | 54 ++++++ 13 files changed, 289 insertions(+), 190 deletions(-) create mode 100644 test/test-only-hash.js diff --git a/package.json b/package.json index eb34ec84..a914b331 100644 --- a/package.json +++ b/package.json @@ -62,9 +62,12 @@ "ipld-dag-pb": "^0.11.0", "ipld-resolver": "^0.13.1", "left-pad": "^1.1.3", + "levelup": "^1.3.9", "lodash": "^4.17.4", "multihashes": "^0.4.9", "multihashing-async": "^0.4.6", + "multiplex": "^6.7.0", + "nan": "^2.7.0", "pull-batch": "^1.0.0", "pull-block": "^1.2.0", "pull-cat": "^1.1.11", @@ -76,6 +79,7 @@ "pull-stream": "^3.6.0", "pull-traverse": "^1.0.3", "pull-write": "^1.1.4", + "readable-stream": "^2.3.3", "sparse-array": "^1.3.1" }, "contributors": [ diff --git a/src/builder/builder.js b/src/builder/builder.js index 235b0451..927071f0 100644 --- a/src/builder/builder.js +++ b/src/builder/builder.js @@ -57,10 +57,12 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options) // 2. write it to the dag store const d = new UnixFS('directory') + waterfall([ (cb) => DAGNode.create(d.marshal(), [], options.hashAlg, cb), (node, cb) => { - if (options['only-hash']) return cb(null, node) + console.log('createAndStoreDir', options.onlyHash) + if (options.onlyHash) return cb(null, node) ipldResolver.put(node, { cid: new CID(node.multihash) @@ -103,7 +105,7 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options) }) }), pull.asyncMap((leaf, callback) => { - if (options['only-hash']) return callback(null, leaf) + if (options.onlyHash) return callback(null, leaf) ipldResolver.put(leaf.DAGNode, { cid: new CID(leaf.DAGNode.multihash) diff --git a/src/builder/reduce.js b/src/builder/reduce.js index 3277e1ce..9dee54b7 100644 --- a/src/builder/reduce.js +++ b/src/builder/reduce.js @@ -34,7 +34,8 @@ module.exports = function (file, ipldResolver, options) { waterfall([ (cb) => DAGNode.create(f.marshal(), links, cb), (node, cb) => { - if (options['only-hash']) return cb(null, node) + console.log('reducer', options.onlyHash) + if (options.onlyHash) return cb(null, node) ipldResolver.put(node, { cid: new CID(node.multihash) diff --git a/src/importer/dir-flat.js b/src/importer/dir-flat.js index ead6f5fe..59751c12 100644 --- a/src/importer/dir-flat.js +++ b/src/importer/dir-flat.js @@ -10,10 +10,9 @@ const DAGNode = dagPB.DAGNode const Dir = require('./dir') class DirFlat extends Dir { - constructor (props) { - super() + constructor (props, _options) { + super(props, _options) this._children = {} - Object.assign(this, props) } put (name, value, callback) { @@ -57,10 +56,13 @@ class DirFlat extends Dir { }) const dir = new UnixFS('directory') + waterfall( [ (callback) => DAGNode.create(dir.marshal(), links, callback), (node, callback) => { + if (this._options.onlyHash) return callback(null, node) + ipldResolver.put( node, { @@ -86,6 +88,6 @@ class DirFlat extends Dir { module.exports = createDirFlat -function createDirFlat (props) { - return new DirFlat(props) +function createDirFlat (props, _options) { + return new DirFlat(props, _options) } diff --git a/src/importer/dir-sharded.js b/src/importer/dir-sharded.js index 676361c6..6543180b 100644 --- a/src/importer/dir-sharded.js +++ b/src/importer/dir-sharded.js @@ -41,11 +41,9 @@ const defaultOptions = { class DirSharded extends Dir { constructor (props, _options) { - super() const options = Object.assign({}, defaultOptions, _options) - this._options = options + super(props, options) this._bucket = Bucket(options) - Object.assign(this, props) } put (name, value, callback) { @@ -87,8 +85,8 @@ class DirSharded extends Dir { module.exports = createDirSharded -function createDirSharded (props) { - return new DirSharded(props) +function createDirSharded (props, _options) { + return new DirSharded(props, _options) } function flush (options, bucket, path, ipldResolver, source, callback) { @@ -148,6 +146,8 @@ function flush (options, bucket, path, ipldResolver, source, callback) { [ (callback) => DAGNode.create(dir.marshal(), links, callback), (node, callback) => { + if (this._options.onlyHash) return callback(null, node) + ipldResolver.put( node, { diff --git a/src/importer/dir.js b/src/importer/dir.js index cd1a0e51..fda1f7bf 100644 --- a/src/importer/dir.js +++ b/src/importer/dir.js @@ -1,4 +1,8 @@ 'use strict' module.exports = class Dir { + constructor (props, _options) { + this._options = _options || {} + Object.assign(this, props) + } } diff --git a/src/importer/flat-to-shard.js b/src/importer/flat-to-shard.js index 1b525e98..3bae8b4f 100644 --- a/src/importer/flat-to-shard.js +++ b/src/importer/flat-to-shard.js @@ -5,8 +5,8 @@ const DirSharded = require('./dir-sharded') module.exports = flatToShard -function flatToShard (child, dir, threshold, callback) { - maybeFlatToShardOne(dir, threshold, (err, newDir) => { +function flatToShard (child, dir, threshold, options, callback) { + maybeFlatToShardOne(dir, threshold, options, (err, newDir) => { if (err) { callback(err) return // early @@ -27,7 +27,7 @@ function flatToShard (child, dir, threshold, callback) { }, (callback) => { if (parent) { - flatToShard(newDir, parent, threshold, callback) + flatToShard(newDir, parent, threshold, options, callback) } else { callback(null, newDir) } @@ -40,15 +40,15 @@ function flatToShard (child, dir, threshold, callback) { }) } -function maybeFlatToShardOne (dir, threshold, callback) { +function maybeFlatToShardOne (dir, threshold, options, callback) { if (dir.flat && dir.directChildrenCount() >= threshold) { - definitelyShardOne(dir, callback) + definitelyShardOne(dir, options, callback) } else { callback(null, dir) } } -function definitelyShardOne (oldDir, callback) { +function definitelyShardOne (oldDir, options, callback) { const newDir = DirSharded({ root: oldDir.root, dir: true, @@ -57,7 +57,7 @@ function definitelyShardOne (oldDir, callback) { path: oldDir.path, dirty: oldDir.dirty, flat: false - }) + }, options) oldDir.eachChildSeries( (key, value, callback) => { diff --git a/src/importer/flush-tree.js b/src/importer/flush-tree.js index 936c44af..1174defa 100644 --- a/src/importer/flush-tree.js +++ b/src/importer/flush-tree.js @@ -1,163 +1,163 @@ -'use strict' - -const mh = require('multihashes') -const UnixFS = require('ipfs-unixfs') -const CID = require('cids') -const dagPB = require('ipld-dag-pb') -const mapValues = require('async/mapValues') -const waterfall = require('async/waterfall') -const DAGLink = dagPB.DAGLink -const DAGNode = dagPB.DAGNode - -module.exports = (files, ipldResolver, source, callback) => { - // 1) convert files to a tree - const fileTree = createTree(files) - - if (Object.keys(fileTree).length > 1) { - callback(new Error('detected more than one root')) - return - } - - if (Object.keys(fileTree).length === 0) { - return callback()// no dirs to be created - } - - // 2) create sizeIndex - const sizeIndex = createSizeIndex(files) - - // 3) bottom up flushing - traverse(fileTree, sizeIndex, null, ipldResolver, source, callback) -} - -/* - * createTree - * - * received an array of files with the format: - * { - * path: // full path - * multihash: // multihash of the dagNode - * size: // cumulative size - * } - * - * returns a JSON object that represents a tree where branches are the paths - * and the leaves are objects with file names and respective multihashes, such - * as: - * { - * foo: { - * bar: { - * baz.txt: - * } - * } - * } - */ -function createTree (files) { - const fileTree = {} - - files.forEach((file) => { - let splitted = file.path.split('/') - if (splitted.length === 1) { - return // adding just one file - } - if (splitted[0] === '') { - splitted = splitted.slice(1) - } - var tmpTree = fileTree - - for (var i = 0; i < splitted.length; i++) { - if (!tmpTree[splitted[i]]) { - tmpTree[splitted[i]] = {} - } - if (i === splitted.length - 1) { - tmpTree[splitted[i]] = file.multihash - } else { - tmpTree = tmpTree[splitted[i]] - } - } - }) - - return fileTree -} - -/* - * create a size index that goes like: - * { : } - */ -function createSizeIndex (files) { - const sizeIndex = {} - - files.forEach((file) => { - sizeIndex[mh.toB58String(file.multihash)] = file.size - }) - - return sizeIndex -} - -/* - * expand the branches recursively (depth first), flush them first - * and then traverse through the bottoum up, flushing everynode - * - * Algorithm tl;dr; - * create a dirNode - * Object.keys - * If the value is an Object - * create a dir Node - * Object.keys - * Once finished, add the result as a link to the dir node - * If the value is not an object - * add as a link to the dirNode - */ -function traverse (tree, sizeIndex, path, ipldResolver, source, done) { - mapValues(tree, (node, key, cb) => { - if (isLeaf(node)) { - return cb(null, node) - } - - traverse(node, sizeIndex, path ? `${path}/${key}` : key, ipldResolver, source, cb) - }, (err, tree) => { - if (err) { - return done(err) - } - - // at this stage, all keys are multihashes - // create a dir node - // add all the multihashes as links - // return this new node multihash - - const keys = Object.keys(tree) - const dir = new UnixFS('directory') - const links = keys.map((key) => { - const b58mh = mh.toB58String(tree[key]) - return new DAGLink(key, sizeIndex[b58mh], tree[key]) - }) - - waterfall([ - (cb) => DAGNode.create(dir.marshal(), links, cb), - (node, cb) => { - sizeIndex[mh.toB58String(node.multihash)] = node.size - - ipldResolver.put(node, { - cid: new CID(node.multihash) - }, (err) => cb(err, node)) - } - ], (err, node) => { - if (err) { - source.push(new Error('failed to store dirNode')) - return done(err) - } - - if (path) { - source.push({ - path: path, - multihash: node.multihash, - size: node.size - }) - } - - done(null, node.multihash) - }) - }) -} - -function isLeaf (value) { - return !(typeof value === 'object' && !Buffer.isBuffer(value)) -} +// 'use strict' +// +// const mh = require('multihashes') +// const UnixFS = require('ipfs-unixfs') +// const CID = require('cids') +// const dagPB = require('ipld-dag-pb') +// const mapValues = require('async/mapValues') +// const waterfall = require('async/waterfall') +// const DAGLink = dagPB.DAGLink +// const DAGNode = dagPB.DAGNode +// +// module.exports = (files, ipldResolver, source, callback) => { +// // 1) convert files to a tree +// const fileTree = createTree(files) +// +// if (Object.keys(fileTree).length > 1) { +// callback(new Error('detected more than one root')) +// return +// } +// +// if (Object.keys(fileTree).length === 0) { +// return callback()// no dirs to be created +// } +// +// // 2) create sizeIndex +// const sizeIndex = createSizeIndex(files) +// +// // 3) bottom up flushing +// traverse(fileTree, sizeIndex, null, ipldResolver, source, callback) +// } +// +// /* +// * createTree +// * +// * received an array of files with the format: +// * { +// * path: // full path +// * multihash: // multihash of the dagNode +// * size: // cumulative size +// * } +// * +// * returns a JSON object that represents a tree where branches are the paths +// * and the leaves are objects with file names and respective multihashes, such +// * as: +// * { +// * foo: { +// * bar: { +// * baz.txt: +// * } +// * } +// * } +// */ +// function createTree (files) { +// const fileTree = {} +// +// files.forEach((file) => { +// let splitted = file.path.split('/') +// if (splitted.length === 1) { +// return // adding just one file +// } +// if (splitted[0] === '') { +// splitted = splitted.slice(1) +// } +// var tmpTree = fileTree +// +// for (var i = 0; i < splitted.length; i++) { +// if (!tmpTree[splitted[i]]) { +// tmpTree[splitted[i]] = {} +// } +// if (i === splitted.length - 1) { +// tmpTree[splitted[i]] = file.multihash +// } else { +// tmpTree = tmpTree[splitted[i]] +// } +// } +// }) +// +// return fileTree +// } +// +// /* +// * create a size index that goes like: +// * { : } +// */ +// function createSizeIndex (files) { +// const sizeIndex = {} +// +// files.forEach((file) => { +// sizeIndex[mh.toB58String(file.multihash)] = file.size +// }) +// +// return sizeIndex +// } +// +// /* +// * expand the branches recursively (depth first), flush them first +// * and then traverse through the bottoum up, flushing everynode +// * +// * Algorithm tl;dr; +// * create a dirNode +// * Object.keys +// * If the value is an Object +// * create a dir Node +// * Object.keys +// * Once finished, add the result as a link to the dir node +// * If the value is not an object +// * add as a link to the dirNode +// */ +// function traverse (tree, sizeIndex, path, ipldResolver, source, done) { +// mapValues(tree, (node, key, cb) => { +// if (isLeaf(node)) { +// return cb(null, node) +// } +// +// traverse(node, sizeIndex, path ? `${path}/${key}` : key, ipldResolver, source, cb) +// }, (err, tree) => { +// if (err) { +// return done(err) +// } +// +// // at this stage, all keys are multihashes +// // create a dir node +// // add all the multihashes as links +// // return this new node multihash +// +// const keys = Object.keys(tree) +// const dir = new UnixFS('directory') +// const links = keys.map((key) => { +// const b58mh = mh.toB58String(tree[key]) +// return new DAGLink(key, sizeIndex[b58mh], tree[key]) +// }) +// +// waterfall([ +// (cb) => DAGNode.create(dir.marshal(), links, cb), +// (node, cb) => { +// sizeIndex[mh.toB58String(node.multihash)] = node.size +// +// ipldResolver.put(node, { +// cid: new CID(node.multihash) +// }, (err) => cb(err, node)) +// } +// ], (err, node) => { +// if (err) { +// source.push(new Error('failed to store dirNode')) +// return done(err) +// } +// +// if (path) { +// source.push({ +// path: path, +// multihash: node.multihash, +// size: node.size +// }) +// } +// +// done(null, node.multihash) +// }) +// }) +// } +// +// function isLeaf (value) { +// return !(typeof value === 'object' && !Buffer.isBuffer(value)) +// } diff --git a/src/importer/index.js b/src/importer/index.js index 8ae157a7..f1dbdd8b 100644 --- a/src/importer/index.js +++ b/src/importer/index.js @@ -64,7 +64,7 @@ module.exports = function (ipldResolver, _options) { return node }), treeBuilderStream - ) + ) return { sink: entry.sink, diff --git a/src/importer/tree-builder.js b/src/importer/tree-builder.js index 22c17906..b3ef1d09 100644 --- a/src/importer/tree-builder.js +++ b/src/importer/tree-builder.js @@ -14,14 +14,14 @@ module.exports = createTreeBuilder const defaultOptions = { wrap: false, - shardSplitThreshold: 1000 + shardSplitThreshold: 1000, + onlyHash: true } function createTreeBuilder (ipldResolver, _options) { const options = Object.assign({}, defaultOptions, _options) const queue = createQueue(consumeQueue, 1) - // returned stream let stream = createStream() @@ -32,7 +32,7 @@ function createTreeBuilder (ipldResolver, _options) { dir: true, dirty: false, flat: true - }) + }, options) return { flush: flushRoot, @@ -101,7 +101,6 @@ function createTreeBuilder (ipldResolver, _options) { currentPath += '/' } currentPath += pathElem - const last = (index === lastIndex) parent.dirty = true parent.multihash = null @@ -110,7 +109,7 @@ function createTreeBuilder (ipldResolver, _options) { if (last) { waterfall([ (callback) => parent.put(pathElem, elem, callback), - (callback) => flatToShard(null, parent, options.shardSplitThreshold, callback), + (callback) => flatToShard(null, parent, options.shardSplitThreshold, options, callback), (newRoot, callback) => { tree = newRoot callback() @@ -131,7 +130,7 @@ function createTreeBuilder (ipldResolver, _options) { path: currentPath, dirty: true, flat: true - }) + }, options) } const parentDir = parent parent = dir diff --git a/test/node.js b/test/node.js index 01c7c0c5..3169682b 100644 --- a/test/node.js +++ b/test/node.js @@ -53,4 +53,5 @@ describe('IPFS UnixFS Engine', () => { require('./test-nested-dir-import-export')(repo) require('./test-dirbuilder-sharding')(repo) require('./test-dag-api') + require('./test-only-hash')(repo) }) diff --git a/test/test-importer.js b/test/test-importer.js index 4fedead2..f18241b5 100644 --- a/test/test-importer.js +++ b/test/test-importer.js @@ -10,6 +10,7 @@ const expect = chai.expect const BlockService = require('ipfs-block-service') const pull = require('pull-stream') const mh = require('multihashes') +const CID = require('cids') const IPLDResolver = require('ipld-resolver') const loadFixture = require('aegir/fixtures') @@ -417,6 +418,37 @@ module.exports = (repo) => { } } }) + + it.only('will not write to disk if passed --only-hash option', (done) => { + const content = String(Math.random() + Date.now()) + const inputFile = { + path: content + '.txt', + content: Buffer.from(content) + } + + const options = { + onlyHash: true + } + + const onCollected = (err, files) => { + if (err) return done(err) + + const file = files[0] + expect(file).to.exist() + + ipldResolver.get(new CID(file.multihash), (err, res) => { + expect(err).to.exist() + expect(err.code).to.equal('ENOENT') + done() + }) + } + + pull( + pull.values([inputFile]), + importer(ipldResolver, options), + pull.collect(onCollected) + ) + }) }) }) } diff --git a/test/test-only-hash.js b/test/test-only-hash.js new file mode 100644 index 00000000..b7a3f5b6 --- /dev/null +++ b/test/test-only-hash.js @@ -0,0 +1,54 @@ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const BlockService = require('ipfs-block-service') +const pull = require('pull-stream') +const IPLDResolver = require('ipld-resolver') +const CID = require('cids') +const createBuilder = require('../src/builder') +const FixedSizeChunker = require('../src/chunker/fixed-size') + +module.exports = (repo) => { + describe('builder', () => { + let ipldResolver + + before(() => { + const bs = new BlockService(repo) + ipldResolver = new IPLDResolver(bs) + }) + + it('will only chunk and hash if passed an --only-hash option', (done) => { + const onCollected = (err, nodes) => { + if (err) return done(err) + + const node = nodes[0] + expect(node).to.exist() + + ipldResolver.get(new CID(node.multihash), (err, res) => { + expect(err).to.exist() + expect(err.code).to.equal('ENOENT') + done() + }) + } + + const content = String(Math.random() + Date.now()) + const inputFile = { + path: content + '.txt', + content: Buffer.from(content) + } + + const options = { + onlyHash: true + } + + pull( + pull.values([inputFile]), + createBuilder(FixedSizeChunker, ipldResolver, options), + pull.collect(onCollected) + ) + }) + }) +} From d6e868b5eee7b6c1f479129c54e74f74165f4854 Mon Sep 17 00:00:00 2001 From: Bernard Mordan Date: Wed, 6 Sep 2017 11:03:39 +0100 Subject: [PATCH 3/6] removes console.logging --- src/builder/builder.js | 1 - src/builder/reduce.js | 1 - 2 files changed, 2 deletions(-) diff --git a/src/builder/builder.js b/src/builder/builder.js index 927071f0..7cdd6c8f 100644 --- a/src/builder/builder.js +++ b/src/builder/builder.js @@ -61,7 +61,6 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options) waterfall([ (cb) => DAGNode.create(d.marshal(), [], options.hashAlg, cb), (node, cb) => { - console.log('createAndStoreDir', options.onlyHash) if (options.onlyHash) return cb(null, node) ipldResolver.put(node, { diff --git a/src/builder/reduce.js b/src/builder/reduce.js index 9dee54b7..3b7ea1c9 100644 --- a/src/builder/reduce.js +++ b/src/builder/reduce.js @@ -34,7 +34,6 @@ module.exports = function (file, ipldResolver, options) { waterfall([ (cb) => DAGNode.create(f.marshal(), links, cb), (node, cb) => { - console.log('reducer', options.onlyHash) if (options.onlyHash) return cb(null, node) ipldResolver.put(node, { From 848f7941e94b5e1abfb0f6948c4c9d8fbd21a82e Mon Sep 17 00:00:00 2001 From: Bernard Mordan Date: Wed, 6 Sep 2017 11:07:19 +0100 Subject: [PATCH 4/6] restores src/importer/flush-tree (which is not actually ever used or executed) --- src/importer/flush-tree.js | 326 ++++++++++++++++++------------------- 1 file changed, 163 insertions(+), 163 deletions(-) diff --git a/src/importer/flush-tree.js b/src/importer/flush-tree.js index 1174defa..936c44af 100644 --- a/src/importer/flush-tree.js +++ b/src/importer/flush-tree.js @@ -1,163 +1,163 @@ -// 'use strict' -// -// const mh = require('multihashes') -// const UnixFS = require('ipfs-unixfs') -// const CID = require('cids') -// const dagPB = require('ipld-dag-pb') -// const mapValues = require('async/mapValues') -// const waterfall = require('async/waterfall') -// const DAGLink = dagPB.DAGLink -// const DAGNode = dagPB.DAGNode -// -// module.exports = (files, ipldResolver, source, callback) => { -// // 1) convert files to a tree -// const fileTree = createTree(files) -// -// if (Object.keys(fileTree).length > 1) { -// callback(new Error('detected more than one root')) -// return -// } -// -// if (Object.keys(fileTree).length === 0) { -// return callback()// no dirs to be created -// } -// -// // 2) create sizeIndex -// const sizeIndex = createSizeIndex(files) -// -// // 3) bottom up flushing -// traverse(fileTree, sizeIndex, null, ipldResolver, source, callback) -// } -// -// /* -// * createTree -// * -// * received an array of files with the format: -// * { -// * path: // full path -// * multihash: // multihash of the dagNode -// * size: // cumulative size -// * } -// * -// * returns a JSON object that represents a tree where branches are the paths -// * and the leaves are objects with file names and respective multihashes, such -// * as: -// * { -// * foo: { -// * bar: { -// * baz.txt: -// * } -// * } -// * } -// */ -// function createTree (files) { -// const fileTree = {} -// -// files.forEach((file) => { -// let splitted = file.path.split('/') -// if (splitted.length === 1) { -// return // adding just one file -// } -// if (splitted[0] === '') { -// splitted = splitted.slice(1) -// } -// var tmpTree = fileTree -// -// for (var i = 0; i < splitted.length; i++) { -// if (!tmpTree[splitted[i]]) { -// tmpTree[splitted[i]] = {} -// } -// if (i === splitted.length - 1) { -// tmpTree[splitted[i]] = file.multihash -// } else { -// tmpTree = tmpTree[splitted[i]] -// } -// } -// }) -// -// return fileTree -// } -// -// /* -// * create a size index that goes like: -// * { : } -// */ -// function createSizeIndex (files) { -// const sizeIndex = {} -// -// files.forEach((file) => { -// sizeIndex[mh.toB58String(file.multihash)] = file.size -// }) -// -// return sizeIndex -// } -// -// /* -// * expand the branches recursively (depth first), flush them first -// * and then traverse through the bottoum up, flushing everynode -// * -// * Algorithm tl;dr; -// * create a dirNode -// * Object.keys -// * If the value is an Object -// * create a dir Node -// * Object.keys -// * Once finished, add the result as a link to the dir node -// * If the value is not an object -// * add as a link to the dirNode -// */ -// function traverse (tree, sizeIndex, path, ipldResolver, source, done) { -// mapValues(tree, (node, key, cb) => { -// if (isLeaf(node)) { -// return cb(null, node) -// } -// -// traverse(node, sizeIndex, path ? `${path}/${key}` : key, ipldResolver, source, cb) -// }, (err, tree) => { -// if (err) { -// return done(err) -// } -// -// // at this stage, all keys are multihashes -// // create a dir node -// // add all the multihashes as links -// // return this new node multihash -// -// const keys = Object.keys(tree) -// const dir = new UnixFS('directory') -// const links = keys.map((key) => { -// const b58mh = mh.toB58String(tree[key]) -// return new DAGLink(key, sizeIndex[b58mh], tree[key]) -// }) -// -// waterfall([ -// (cb) => DAGNode.create(dir.marshal(), links, cb), -// (node, cb) => { -// sizeIndex[mh.toB58String(node.multihash)] = node.size -// -// ipldResolver.put(node, { -// cid: new CID(node.multihash) -// }, (err) => cb(err, node)) -// } -// ], (err, node) => { -// if (err) { -// source.push(new Error('failed to store dirNode')) -// return done(err) -// } -// -// if (path) { -// source.push({ -// path: path, -// multihash: node.multihash, -// size: node.size -// }) -// } -// -// done(null, node.multihash) -// }) -// }) -// } -// -// function isLeaf (value) { -// return !(typeof value === 'object' && !Buffer.isBuffer(value)) -// } +'use strict' + +const mh = require('multihashes') +const UnixFS = require('ipfs-unixfs') +const CID = require('cids') +const dagPB = require('ipld-dag-pb') +const mapValues = require('async/mapValues') +const waterfall = require('async/waterfall') +const DAGLink = dagPB.DAGLink +const DAGNode = dagPB.DAGNode + +module.exports = (files, ipldResolver, source, callback) => { + // 1) convert files to a tree + const fileTree = createTree(files) + + if (Object.keys(fileTree).length > 1) { + callback(new Error('detected more than one root')) + return + } + + if (Object.keys(fileTree).length === 0) { + return callback()// no dirs to be created + } + + // 2) create sizeIndex + const sizeIndex = createSizeIndex(files) + + // 3) bottom up flushing + traverse(fileTree, sizeIndex, null, ipldResolver, source, callback) +} + +/* + * createTree + * + * received an array of files with the format: + * { + * path: // full path + * multihash: // multihash of the dagNode + * size: // cumulative size + * } + * + * returns a JSON object that represents a tree where branches are the paths + * and the leaves are objects with file names and respective multihashes, such + * as: + * { + * foo: { + * bar: { + * baz.txt: + * } + * } + * } + */ +function createTree (files) { + const fileTree = {} + + files.forEach((file) => { + let splitted = file.path.split('/') + if (splitted.length === 1) { + return // adding just one file + } + if (splitted[0] === '') { + splitted = splitted.slice(1) + } + var tmpTree = fileTree + + for (var i = 0; i < splitted.length; i++) { + if (!tmpTree[splitted[i]]) { + tmpTree[splitted[i]] = {} + } + if (i === splitted.length - 1) { + tmpTree[splitted[i]] = file.multihash + } else { + tmpTree = tmpTree[splitted[i]] + } + } + }) + + return fileTree +} + +/* + * create a size index that goes like: + * { : } + */ +function createSizeIndex (files) { + const sizeIndex = {} + + files.forEach((file) => { + sizeIndex[mh.toB58String(file.multihash)] = file.size + }) + + return sizeIndex +} + +/* + * expand the branches recursively (depth first), flush them first + * and then traverse through the bottoum up, flushing everynode + * + * Algorithm tl;dr; + * create a dirNode + * Object.keys + * If the value is an Object + * create a dir Node + * Object.keys + * Once finished, add the result as a link to the dir node + * If the value is not an object + * add as a link to the dirNode + */ +function traverse (tree, sizeIndex, path, ipldResolver, source, done) { + mapValues(tree, (node, key, cb) => { + if (isLeaf(node)) { + return cb(null, node) + } + + traverse(node, sizeIndex, path ? `${path}/${key}` : key, ipldResolver, source, cb) + }, (err, tree) => { + if (err) { + return done(err) + } + + // at this stage, all keys are multihashes + // create a dir node + // add all the multihashes as links + // return this new node multihash + + const keys = Object.keys(tree) + const dir = new UnixFS('directory') + const links = keys.map((key) => { + const b58mh = mh.toB58String(tree[key]) + return new DAGLink(key, sizeIndex[b58mh], tree[key]) + }) + + waterfall([ + (cb) => DAGNode.create(dir.marshal(), links, cb), + (node, cb) => { + sizeIndex[mh.toB58String(node.multihash)] = node.size + + ipldResolver.put(node, { + cid: new CID(node.multihash) + }, (err) => cb(err, node)) + } + ], (err, node) => { + if (err) { + source.push(new Error('failed to store dirNode')) + return done(err) + } + + if (path) { + source.push({ + path: path, + multihash: node.multihash, + size: node.size + }) + } + + done(null, node.multihash) + }) + }) +} + +function isLeaf (value) { + return !(typeof value === 'object' && !Buffer.isBuffer(value)) +} From a3e694bc1b4012e81209f0d9f6e568566ac05db8 Mon Sep 17 00:00:00 2001 From: Bernard Mordan Date: Wed, 6 Sep 2017 12:30:38 +0100 Subject: [PATCH 5/6] Removes .only on a test and changes the option name. Adds reference to 'onlyHash' in the read me --- README.md | 1 + src/importer/tree-builder.js | 2 +- test/browser.js | 1 + test/test-importer.js | 2 +- test/test-only-hash.js | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 79d553bf..a6ea85f7 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb - `dirBuilder` (object): the options for the directory builder - `hamt` (object): the options for the HAMT sharded directory builder - bits (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT +- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk ### Exporter diff --git a/src/importer/tree-builder.js b/src/importer/tree-builder.js index b3ef1d09..c04cb0ac 100644 --- a/src/importer/tree-builder.js +++ b/src/importer/tree-builder.js @@ -15,7 +15,7 @@ module.exports = createTreeBuilder const defaultOptions = { wrap: false, shardSplitThreshold: 1000, - onlyHash: true + onlyHash: false } function createTreeBuilder (ipldResolver, _options) { diff --git a/test/browser.js b/test/browser.js index 4d3e10a0..d876eb80 100644 --- a/test/browser.js +++ b/test/browser.js @@ -52,4 +52,5 @@ describe('IPFS data importing tests on the Browser', function () { require('./test-hash-parity-with-go-ipfs')(repo) require('./test-nested-dir-import-export')(repo) require('./test-dirbuilder-sharding')(repo) + require('./test-only-hash')(repo) }) diff --git a/test/test-importer.js b/test/test-importer.js index f18241b5..79cffbd1 100644 --- a/test/test-importer.js +++ b/test/test-importer.js @@ -419,7 +419,7 @@ module.exports = (repo) => { } }) - it.only('will not write to disk if passed --only-hash option', (done) => { + it('will not write to disk if passed "onlyHash" option', (done) => { const content = String(Math.random() + Date.now()) const inputFile = { path: content + '.txt', diff --git a/test/test-only-hash.js b/test/test-only-hash.js index b7a3f5b6..13261f5a 100644 --- a/test/test-only-hash.js +++ b/test/test-only-hash.js @@ -20,7 +20,7 @@ module.exports = (repo) => { ipldResolver = new IPLDResolver(bs) }) - it('will only chunk and hash if passed an --only-hash option', (done) => { + it('will only chunk and hash if passed an "onlyHash" option', (done) => { const onCollected = (err, nodes) => { if (err) return done(err) From 5cce30e3caa20d7829476cc7d6d6ee8a3aaa3bb4 Mon Sep 17 00:00:00 2001 From: Alan Shaw Date: Wed, 6 Sep 2017 13:57:29 +0100 Subject: [PATCH 6/6] Fix the tests --- src/importer/dir-sharded.js | 2 +- test/browser.js | 2 +- test/node.js | 2 +- test/{test-only-hash.js => test-builder-only-hash.js} | 1 - test/test-importer.js | 1 - 5 files changed, 3 insertions(+), 5 deletions(-) rename test/{test-only-hash.js => test-builder-only-hash.js} (96%) diff --git a/src/importer/dir-sharded.js b/src/importer/dir-sharded.js index 6543180b..f120614b 100644 --- a/src/importer/dir-sharded.js +++ b/src/importer/dir-sharded.js @@ -146,7 +146,7 @@ function flush (options, bucket, path, ipldResolver, source, callback) { [ (callback) => DAGNode.create(dir.marshal(), links, callback), (node, callback) => { - if (this._options.onlyHash) return callback(null, node) + if (options.onlyHash) return callback(null, node) ipldResolver.put( node, diff --git a/test/browser.js b/test/browser.js index d876eb80..c2bc0c2d 100644 --- a/test/browser.js +++ b/test/browser.js @@ -52,5 +52,5 @@ describe('IPFS data importing tests on the Browser', function () { require('./test-hash-parity-with-go-ipfs')(repo) require('./test-nested-dir-import-export')(repo) require('./test-dirbuilder-sharding')(repo) - require('./test-only-hash')(repo) + require('./test-builder-only-hash')(repo) }) diff --git a/test/node.js b/test/node.js index 3169682b..4790b05b 100644 --- a/test/node.js +++ b/test/node.js @@ -53,5 +53,5 @@ describe('IPFS UnixFS Engine', () => { require('./test-nested-dir-import-export')(repo) require('./test-dirbuilder-sharding')(repo) require('./test-dag-api') - require('./test-only-hash')(repo) + require('./test-builder-only-hash')(repo) }) diff --git a/test/test-only-hash.js b/test/test-builder-only-hash.js similarity index 96% rename from test/test-only-hash.js rename to test/test-builder-only-hash.js index 13261f5a..eb48de8e 100644 --- a/test/test-only-hash.js +++ b/test/test-builder-only-hash.js @@ -29,7 +29,6 @@ module.exports = (repo) => { ipldResolver.get(new CID(node.multihash), (err, res) => { expect(err).to.exist() - expect(err.code).to.equal('ENOENT') done() }) } diff --git a/test/test-importer.js b/test/test-importer.js index 97df1c45..40afab90 100644 --- a/test/test-importer.js +++ b/test/test-importer.js @@ -439,7 +439,6 @@ module.exports = (repo) => { ipldResolver.get(new CID(file.multihash), (err, res) => { expect(err).to.exist() - expect(err.code).to.equal('ENOENT') done() }) }