Skip to content

Commit 7f63227

Browse files
fix
1 parent 1f10bdf commit 7f63227

File tree

3 files changed

+227
-22
lines changed

3 files changed

+227
-22
lines changed

src/cmap/wire_protocol/on_demand/document.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
getInt32LE,
1111
ObjectId,
1212
parseToElementsToArray,
13+
pluckBSONSerializeOptions,
1314
Timestamp,
1415
toUTF8
1516
} from '../../../bson';
@@ -330,11 +331,23 @@ export class OnDemandDocument {
330331
* @param options - BSON deserialization options
331332
*/
332333
public toObject(options?: BSONSerializeOptions): Record<string, any> {
333-
return BSON.deserialize(this.bson, {
334-
...options,
334+
const exactBSONOptions = {
335+
...pluckBSONSerializeOptions(options ?? {}),
336+
validation: this.parseBsonSerializationOptions(options),
335337
index: this.offset,
336338
allowObjectSmallerThanBufferSize: true
337-
});
339+
};
340+
return BSON.deserialize(this.bson, exactBSONOptions);
341+
}
342+
343+
private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
344+
utf8: { writeErrors: false } | false;
345+
} {
346+
const enableUtf8Validation = options?.enableUtf8Validation;
347+
if (enableUtf8Validation === false) {
348+
return { utf8: false };
349+
}
350+
return { utf8: { writeErrors: false } };
338351
}
339352

340353
/** Returns this document's bytes only */

src/cmap/wire_protocol/responses.ts

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import {
55
type Document,
66
Long,
77
parseToElementsToArray,
8-
pluckBSONSerializeOptions,
98
type Timestamp
109
} from '../../bson';
1110
import { MongoUnexpectedServerResponseError } from '../../error';
@@ -166,24 +165,6 @@ export class MongoDBResponse extends OnDemandDocument {
166165
}
167166
return this.clusterTime ?? null;
168167
}
169-
170-
public override toObject(options?: BSONSerializeOptions): Record<string, any> {
171-
const exactBSONOptions = {
172-
...pluckBSONSerializeOptions(options ?? {}),
173-
validation: this.parseBsonSerializationOptions(options)
174-
};
175-
return super.toObject(exactBSONOptions);
176-
}
177-
178-
private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
179-
utf8: { writeErrors: false } | false;
180-
} {
181-
const enableUtf8Validation = options?.enableUtf8Validation;
182-
if (enableUtf8Validation === false) {
183-
return { utf8: false };
184-
}
185-
return { utf8: { writeErrors: false } };
186-
}
187168
}
188169

189170
/** @internal */

test/integration/node-specific/bson-options/utf8_validation.test.ts

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import { expect } from 'chai';
2+
import * as net from 'net';
23
import * as sinon from 'sinon';
34

45
import {
56
BSON,
7+
BSONError,
8+
type Collection,
69
type MongoClient,
710
MongoDBResponse,
11+
MongoError,
812
MongoServerError,
913
OpMsgResponse
1014
} from '../../../mongodb';
@@ -153,3 +157,210 @@ describe('class MongoDBResponse', () => {
153157
}
154158
);
155159
});
160+
161+
describe('utf8 validation with cursors', function () {
162+
let client: MongoClient;
163+
let collection: Collection;
164+
165+
/**
166+
* Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits
167+
* for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the
168+
* bytes of the character 'é', to produce invalid utf8.
169+
*/
170+
async function insertDocumentWithInvalidUTF8() {
171+
const targetCharacter = Buffer.from('é').toString('hex');
172+
173+
const stub = sinon.stub(net.Socket.prototype, 'write').callsFake(function (...args) {
174+
const providedBuffer = args[0].toString('hex');
175+
const targetCharacter = Buffer.from('é').toString('hex');
176+
if (providedBuffer.includes(targetCharacter)) {
177+
if (providedBuffer.split(targetCharacter).length !== 2) {
178+
throw new Error('received buffer more than one `c3a9` sequences. or perhaps none?');
179+
}
180+
const buffer = Buffer.from(providedBuffer.replace('c3a9', 'c301'), 'hex');
181+
const result = stub.wrappedMethod.apply(this, [buffer]);
182+
sinon.restore();
183+
return result;
184+
}
185+
const result = stub.wrappedMethod.apply(this, args);
186+
return result;
187+
});
188+
189+
const document = {
190+
field: targetCharacter
191+
};
192+
193+
await collection.insertOne(document);
194+
195+
sinon.restore();
196+
}
197+
198+
beforeEach(async function () {
199+
client = this.configuration.newClient();
200+
await client.connect();
201+
const db = client.db('test');
202+
collection = db.collection('invalidutf');
203+
204+
await collection.deleteMany({});
205+
await insertDocumentWithInvalidUTF8();
206+
});
207+
208+
afterEach(async function () {
209+
await client.close();
210+
});
211+
212+
context('when utf-8 validation is explicitly disabled', function () {
213+
it('documents can be read using a for-await loop without errors', async function () {
214+
for await (const _doc of collection.find({}, { enableUtf8Validation: false }));
215+
});
216+
it('documents can be read using next() without errors', async function () {
217+
const cursor = collection.find({}, { enableUtf8Validation: false });
218+
219+
while (await cursor.hasNext()) {
220+
await cursor.next();
221+
}
222+
});
223+
224+
it('documents can be read using toArray() without errors', async function () {
225+
const cursor = collection.find({}, { enableUtf8Validation: false });
226+
await cursor.toArray();
227+
});
228+
229+
it('documents can be read using .stream() without errors', async function () {
230+
const cursor = collection.find({}, { enableUtf8Validation: false });
231+
await cursor.stream().toArray();
232+
});
233+
234+
it('documents can be read with tryNext() without error', async function () {
235+
const cursor = collection.find({}, { enableUtf8Validation: false });
236+
237+
while (await cursor.hasNext()) {
238+
await cursor.tryNext();
239+
}
240+
});
241+
});
242+
243+
async function expectReject(fn: () => Promise<void>, options?: { regex?: RegExp; errorClass }) {
244+
const regex = options?.regex ?? /.*/;
245+
const errorClass = options?.errorClass ?? MongoError;
246+
try {
247+
await fn();
248+
expect.fail('expected the provided callback function to reject, but it did not.');
249+
} catch (error) {
250+
expect(error).to.match(regex);
251+
expect(error).to.be.instanceOf(errorClass);
252+
}
253+
}
254+
255+
context('when utf-8 validation is explicitly enabled', function () {
256+
it('a for-await loop throw a BSON error', async function () {
257+
await expectReject(
258+
async () => {
259+
for await (const _doc of collection.find({}, { enableUtf8Validation: true }));
260+
},
261+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
262+
);
263+
});
264+
it('next() throws a BSON error', async function () {
265+
await expectReject(
266+
async () => {
267+
const cursor = collection.find({}, { enableUtf8Validation: true });
268+
269+
while (await cursor.hasNext()) {
270+
await cursor.next();
271+
}
272+
},
273+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
274+
);
275+
});
276+
277+
it('toArray() throws a BSON error', async function () {
278+
await expectReject(
279+
async () => {
280+
const cursor = collection.find({}, { enableUtf8Validation: true });
281+
await cursor.toArray();
282+
},
283+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
284+
);
285+
});
286+
287+
it('.stream() throws a BSONError', async function () {
288+
await expectReject(
289+
async () => {
290+
const cursor = collection.find({}, { enableUtf8Validation: true });
291+
await cursor.stream().toArray();
292+
},
293+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
294+
);
295+
});
296+
297+
it('tryNext() throws a BSONError', async function () {
298+
await expectReject(
299+
async () => {
300+
const cursor = collection.find({}, { enableUtf8Validation: true });
301+
302+
while (await cursor.hasNext()) {
303+
await cursor.tryNext();
304+
}
305+
},
306+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
307+
);
308+
});
309+
});
310+
311+
context('utf-8 validation defaults to enabled', function () {
312+
it('a for-await loop throw a BSON error', async function () {
313+
await expectReject(
314+
async () => {
315+
for await (const _doc of collection.find({}));
316+
},
317+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
318+
);
319+
});
320+
it('next() throws a BSON error', async function () {
321+
await expectReject(
322+
async () => {
323+
const cursor = collection.find({});
324+
325+
while (await cursor.hasNext()) {
326+
await cursor.next();
327+
}
328+
},
329+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
330+
);
331+
});
332+
333+
it('toArray() throws a BSON error', async function () {
334+
await expectReject(
335+
async () => {
336+
const cursor = collection.find({});
337+
await cursor.toArray();
338+
},
339+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
340+
);
341+
});
342+
343+
it('.stream() throws a BSONError', async function () {
344+
await expectReject(
345+
async () => {
346+
const cursor = collection.find({});
347+
await cursor.stream().toArray();
348+
},
349+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
350+
);
351+
});
352+
353+
it('tryNext() throws a BSONError', async function () {
354+
await expectReject(
355+
async () => {
356+
const cursor = collection.find({}, { enableUtf8Validation: true });
357+
358+
while (await cursor.hasNext()) {
359+
await cursor.tryNext();
360+
}
361+
},
362+
{ errorClass: BSONError, regex: /Invalid UTF-8 string in BSON document/ }
363+
);
364+
});
365+
});
366+
});

0 commit comments

Comments
 (0)