I'm @dominictarr
I code for fybe.com and MadScience
I like streams.
-
An Abstraction of IO.
-
"chunks of data in time series with back pressure"
-
improve latency
-
reduce memory footprint
-
expand possibilities
-
Real-Time
"We should have some ways of connecting programs like garden hose -- screw in another segment when it becomes necessary to massage data in another way.
This is the way of IO also."
-- Doug McIlroy. October 11, 1964
[photo of collossus]
fs.readFile(file, function (err, data) {
//this is using streams under the hood.
//it's just buffering!
})The Real Question:
Do I want to use streams WELL, or BADLY ?
-
writeable / readable
- files, persistence, logging, news-feeds.
-
through
- compression
- encryption
- (de)serialization
- buffering
- "effects"
-
duplex
- communication.
- http, tcp, stdio (if you count both sides)
- connecting data replication, rpc, event emitter, multiplexing.
(readable) (writable)
source.emit('data', data) ----> dest.write(data)
source.emit('end') ------------> dest.end()
source.pause() <---------------- dest.write()===false
source.resume() <--------------- dest.emit('drain')
source.emit('close') ----------> dest.destroy()
dest|source.emit('end'|'close'|'error')
|
`----------------------------> cleanup() -
one way: readable || writable ** readable ** writeable
-
two way: writable && readable ** through / filter ** duplex
-
Through is like a meat-grinder. (meat goes in, sausage comes out)
-
Duplex is like a telephone. (two entities communicate)
user--,
|
v
,-------------------.
| write(), end() |
| |
|emits 'end', 'data'|
`-------------------`
|
|
user<-/
///////////////////////
| |
,--------------------, |
| | |
user ------->| write(), end() ======> S O M E |
| | |
| | T H I N G |
user <-------|emits 'data', 'end' <== |
| | E L S E |
`--------------------` |
| |
| |
\\\\\\\\\\\\\\\\\\\\\\!
readable.pipe(through).pipe(writable)example
fs.createReadStream(file)
.pipe(zlib.createGZip())
.pipe(fs.createWriteStream(file + '.gz')) duplex.pipe(duplex2).pipe(duplex)Example
var dnode = require('dnode')
var stream = net.connect(port)
sock.pipe(dnode({
status: function (cb) {
cb(null, "streamin'")
}).pipe(sock)duplex
.pipe(through1)
.pipe(duplex2)
.pipe(through2)
.pipe(duplex)stream = net.connect(PORT)
stream
.pipe(es.split())
.pipe(es.parse())
.pipe(rpcStream())
.pipe(es.stringify())
.pipe(stream)through.pipe(through2).pipe(through)Example:
var zip = zlib.createGZip()
var unzip = zlib.createGUnzip()
zip.pipe(unzip).pipe(zip)
Will cause an infinite loop, or a dead-lock.
MyStream.prototype.write = function(data){
if(this.paused) {
this.buffer.push(data)
return false
}
this.emit('data', parse(data))
return true; //THIS IS WRONG!
}MyStream.prototype.write = function(buffer,encoding){
if(this.paused) {
this.buffer.push(data)
return false
}
//emitted events can trigger state changes!
this.emit('data', parse(data))
//vvv JUST CHANGE THIS vvv
return !this.paused; //CORRECT!
}stream.on('end', function onEnd () {
//where is this is the order of 'end' listeners?
stream.destroy()
})
stream.destroy = function () {
stream.emit('close')
}If onEnd is the first listener, then 'close' will
be emitted before the rest of the 'end' listeners are called!
stream.on('end', function onEnd () {
//where is this is the order of 'end' listeners?
process.nextTick(function () {
stream.destroy()
})
})-
Change state BEFORE emitting events.
-
Take care with listeners that emit events. be sure they are the last listener, or use nextTick
but we can use programming to make programming easy.
Stream Base Classes
readable stream from async function.
var from = require('from')
from(function (i, next) {
//do whatever,
this.emit('data', chunk)
//call next when you are finished for i
if(i > 100) //or whatever
this.emit('end')
//time to call this function again!
next()
})create readable stream from array.
var from = require('from')
//will emit 1, 2, 3 as 'data', then 'end'.
var stream = from([1, 2, 3])convert a async function into a through stream.
var map = require('map-stream')
var stearm =
map(function (data, next) {
//transform data asyncronously!
process.nextTick(function () {
next(null, data)
})
})var through = require('through')
var stream =
through(function (data) {
//optionally transform the stream...
this.emit('data', data)
},
function (end) {
this.emit('end')
})var through = require('through')
var stream =
through(function (data) {
this.queue(data)
},
function (end) {
this.queue(null) //null means 'end'
})readable & writable, with buffering on pause.
var emitter = new EventEmitter() //whatever
var duplex = require('duplex')
var d = duplex()
.on('_data', function (data) {
emitter.update(data)
})
emitter.on('update', function (update) {
d._data(update)
})
# Testing Streams...
``` js
var spec = require('stream-spec')
var tester = require('stream-spec')
spec(myStream)
.through({strict: true})
.validateOnExit()
tester.random() //random data
.pipe(myStream) //pipe through my stream
.pipe(tester.pauser()) //test pauses are handled properly.
//base classes are already well tested!substack's stream handbook (in progress) https://github.com/substack/stream-handbook
stream-spec has more documentation https://github.com/dominictarr/stream-spec