Skip to content

Commit 73b72f2

Browse files
davisjamMayaLekova
authored andcommitted
fs: partition readFile against pool exhaustion
Problem: Node implements fs.readFile as: - a call to stat, then - a C++ -> libuv request to read the entire file using the stat size Why is this bad? The effect is to place on the libuv threadpool a potentially-large read request, occupying the libuv thread until it completes. While readFile certainly requires buffering the entire file contents, it can partition the read into smaller buffers (as is done on other read paths) along the way to avoid threadpool exhaustion. If the file is relatively large or stored on a slow medium, reading the entire file in one shot seems particularly harmful, and presents a possible DoS vector. Solution: Partition the read into multiple smaller requests. Considerations: 1. Correctness I don't think partitioning the read like this raises any additional risk of read-write races on the FS. If the application is concurrently readFile'ing and modifying the file, it will already see funny behavior. Though libuv uses preadv where available, this doesn't guarantee read atomicity in the presence of concurrent writes. 2. Performance Downside: Partitioning means that a single large readFile will require into many "out and back" requests to libuv, introducing overhead. Upside: In between each "out and back", other work pending on the threadpool can take a turn. In short, although partitioning will slow down a large request, it will lead to better throughput if the threadpool is handling more than one type of request. Fixes: nodejs#17047 PR-URL: nodejs#17054 Reviewed-By: Benjamin Gruenbaum <[email protected]> Reviewed-By: Tiancheng "Timothy" Gu <[email protected]> Reviewed-By: Gireesh Punathil <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Matteo Collina <[email protected]> Reviewed-By: Sakthipriyan Vairamani <[email protected]> Reviewed-By: Ruben Bridgewater <[email protected]>
1 parent 23e845c commit 73b72f2

File tree

5 files changed

+159
-9
lines changed

5 files changed

+159
-9
lines changed

benchmark/fs/readfile-partitioned.js

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Submit a mix of short and long jobs to the threadpool.
2+
// Report total job throughput.
3+
// If we partition the long job, overall job throughput goes up significantly.
4+
// However, this comes at the cost of the long job throughput.
5+
//
6+
// Short jobs: small zip jobs.
7+
// Long jobs: fs.readFile on a large file.
8+
9+
'use strict';
10+
11+
const path = require('path');
12+
const common = require('../common.js');
13+
const filename = path.resolve(__dirname,
14+
`.removeme-benchmark-garbage-${process.pid}`);
15+
const fs = require('fs');
16+
const zlib = require('zlib');
17+
const assert = require('assert');
18+
19+
const bench = common.createBenchmark(main, {
20+
dur: [5],
21+
len: [1024, 16 * 1024 * 1024],
22+
concurrent: [1, 10]
23+
});
24+
25+
function main(conf) {
26+
const len = +conf.len;
27+
try { fs.unlinkSync(filename); } catch (e) {}
28+
var data = Buffer.alloc(len, 'x');
29+
fs.writeFileSync(filename, data);
30+
data = null;
31+
32+
var zipData = Buffer.alloc(1024, 'a');
33+
34+
var reads = 0;
35+
var zips = 0;
36+
var benchEnded = false;
37+
bench.start();
38+
setTimeout(function() {
39+
const totalOps = reads + zips;
40+
benchEnded = true;
41+
bench.end(totalOps);
42+
try { fs.unlinkSync(filename); } catch (e) {}
43+
}, +conf.dur * 1000);
44+
45+
function read() {
46+
fs.readFile(filename, afterRead);
47+
}
48+
49+
function afterRead(er, data) {
50+
if (er) {
51+
if (er.code === 'ENOENT') {
52+
// Only OK if unlinked by the timer from main.
53+
assert.ok(benchEnded);
54+
return;
55+
}
56+
throw er;
57+
}
58+
59+
if (data.length !== len)
60+
throw new Error('wrong number of bytes returned');
61+
62+
reads++;
63+
if (!benchEnded)
64+
read();
65+
}
66+
67+
function zip() {
68+
zlib.deflate(zipData, afterZip);
69+
}
70+
71+
function afterZip(er, data) {
72+
if (er)
73+
throw er;
74+
75+
zips++;
76+
if (!benchEnded)
77+
zip();
78+
}
79+
80+
// Start reads
81+
var cur = +conf.concurrent;
82+
while (cur--) read();
83+
84+
// Start a competing zip
85+
zip();
86+
}

benchmark/fs/readfile.js

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ const common = require('../common.js');
88
const filename = path.resolve(process.env.NODE_TMPDIR || __dirname,
99
`.removeme-benchmark-garbage-${process.pid}`);
1010
const fs = require('fs');
11+
const assert = require('assert');
1112

1213
const bench = common.createBenchmark(main, {
1314
dur: [5],
@@ -22,10 +23,10 @@ function main({ len, dur, concurrent }) {
2223
data = null;
2324

2425
var reads = 0;
25-
var bench_ended = false;
26+
var benchEnded = false;
2627
bench.start();
2728
setTimeout(function() {
28-
bench_ended = true;
29+
benchEnded = true;
2930
bench.end(reads);
3031
try { fs.unlinkSync(filename); } catch (e) {}
3132
process.exit(0);
@@ -36,14 +37,20 @@ function main({ len, dur, concurrent }) {
3637
}
3738

3839
function afterRead(er, data) {
39-
if (er)
40+
if (er) {
41+
if (er.code === 'ENOENT') {
42+
// Only OK if unlinked by the timer from main.
43+
assert.ok(benchEnded);
44+
return;
45+
}
4046
throw er;
47+
}
4148

4249
if (data.length !== len)
4350
throw new Error('wrong number of bytes returned');
4451

4552
reads++;
46-
if (!bench_ended)
53+
if (!benchEnded)
4754
read();
4855
}
4956

doc/api/fs.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2250,10 +2250,9 @@ Any specified file descriptor has to support reading.
22502250
*Note*: If a file descriptor is specified as the `path`, it will not be closed
22512251
automatically.
22522252

2253-
*Note*: `fs.readFile()` reads the entire file in a single threadpool request.
2254-
To minimize threadpool task length variation, prefer the partitioned APIs
2255-
`fs.read()` and `fs.createReadStream()` when reading files as part of
2256-
fulfilling a client request.
2253+
*Note*: `fs.readFile()` buffers the entire file.
2254+
To minimize memory costs, when possible prefer streaming via
2255+
`fs.createReadStream()`.
22572256

22582257
## fs.readFileSync(path[, options])
22592258
<!-- YAML

lib/fs.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ ReadFileContext.prototype.read = function() {
508508
} else {
509509
buffer = this.buffer;
510510
offset = this.pos;
511-
length = this.size - this.pos;
511+
length = Math.min(kReadFileBufferLength, this.size - this.pos);
512512
}
513513

514514
var req = new FSReqWrap();

test/parallel/test-fs-readfile.js

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
'use strict';
2+
const common = require('../common');
3+
4+
// This test ensures that fs.readFile correctly returns the
5+
// contents of varying-sized files.
6+
7+
const assert = require('assert');
8+
const fs = require('fs');
9+
const path = require('path');
10+
11+
const prefix = `.removeme-fs-readfile-${process.pid}`;
12+
13+
common.refreshTmpDir();
14+
15+
const fileInfo = [
16+
{ name: path.join(common.tmpDir, `${prefix}-1K.txt`),
17+
len: 1024,
18+
},
19+
{ name: path.join(common.tmpDir, `${prefix}-64K.txt`),
20+
len: 64 * 1024,
21+
},
22+
{ name: path.join(common.tmpDir, `${prefix}-64KLessOne.txt`),
23+
len: (64 * 1024) - 1,
24+
},
25+
{ name: path.join(common.tmpDir, `${prefix}-1M.txt`),
26+
len: 1 * 1024 * 1024,
27+
},
28+
{ name: path.join(common.tmpDir, `${prefix}-1MPlusOne.txt`),
29+
len: (1 * 1024 * 1024) + 1,
30+
},
31+
];
32+
33+
// Populate each fileInfo (and file) with unique fill.
34+
const sectorSize = 512;
35+
for (const e of fileInfo) {
36+
e.contents = Buffer.allocUnsafe(e.len);
37+
38+
// This accounts for anything unusual in Node's implementation of readFile.
39+
// Using e.g. 'aa...aa' would miss bugs like Node re-reading
40+
// the same section twice instead of two separate sections.
41+
for (let offset = 0; offset < e.len; offset += sectorSize) {
42+
const fillByte = 256 * Math.random();
43+
const nBytesToFill = Math.min(sectorSize, e.len - offset);
44+
e.contents.fill(fillByte, offset, offset + nBytesToFill);
45+
}
46+
47+
fs.writeFileSync(e.name, e.contents);
48+
}
49+
// All files are now populated.
50+
51+
// Test readFile on each size.
52+
for (const e of fileInfo) {
53+
fs.readFile(e.name, common.mustCall((err, buf) => {
54+
console.log(`Validating readFile on file ${e.name} of length ${e.len}`);
55+
assert.ifError(err, 'An error occurred');
56+
assert.deepStrictEqual(buf, e.contents, 'Incorrect file contents');
57+
}));
58+
}

0 commit comments

Comments
 (0)