Skip to content

Commit f784469

Browse files
author
matthiashuschle
committed
BUG: to_json - prevent various segfault conditions (GH14256)
1 parent 9f0ee53 commit f784469

File tree

5 files changed

+53
-6
lines changed

5 files changed

+53
-6
lines changed

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -940,3 +940,5 @@ Other
940940
^^^^^
941941
- Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`)
942942
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
943+
- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`)
944+

pandas/_libs/src/ujson/lib/ultrajson.h

+7
Original file line numberDiff line numberDiff line change
@@ -307,4 +307,11 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec,
307307
const char *buffer, size_t cbBuffer);
308308
EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t);
309309

310+
#define Buffer_Reserve(__enc, __len) \
311+
if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
312+
Buffer_Realloc((__enc), (__len)); \
313+
}
314+
315+
void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded);
316+
310317
#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_

pandas/_libs/src/ujson/lib/ultrajsonenc.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -714,11 +714,6 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc,
714714
}
715715
}
716716

717-
#define Buffer_Reserve(__enc, __len) \
718-
if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
719-
Buffer_Realloc((__enc), (__len)); \
720-
}
721-
722717
#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr;
723718

724719
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
@@ -976,6 +971,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
976971
}
977972

978973
enc->iterEnd(obj, &tc);
974+
Buffer_Reserve(enc, 2);
979975
Buffer_AppendCharUnchecked(enc, ']');
980976
break;
981977
}
@@ -1003,6 +999,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
1003999
}
10041000

10051001
enc->iterEnd(obj, &tc);
1002+
Buffer_Reserve(enc, 2);
10061003
Buffer_AppendCharUnchecked(enc, '}');
10071004
break;
10081005
}

pandas/_libs/src/ujson/python/objToJSON.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,7 @@ static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen,
783783
JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
784784
PRINTMARK();
785785
*outLen = strlen(labels[idx]);
786+
Buffer_Reserve(enc, *outLen);
786787
memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen));
787788
enc->offset += *outLen;
788789
*outLen = 0;
@@ -879,7 +880,7 @@ int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
879880
NpyArrContext *npyarr;
880881
PRINTMARK();
881882

882-
if (PyErr_Occurred()) {
883+
if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
883884
return 0;
884885
}
885886

@@ -1224,6 +1225,10 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
12241225
PyObject *attrName;
12251226
char *attrStr;
12261227

1228+
if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
1229+
return 0;
1230+
}
1231+
12271232
if (itemValue) {
12281233
Py_DECREF(GET_TC(tc)->itemValue);
12291234
GET_TC(tc)->itemValue = itemValue = NULL;

pandas/tests/io/json/test_pandas.py

+36
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,42 @@ def test_blocks_compat_GH9037(self):
511511
by_blocks=True,
512512
check_exact=True)
513513

514+
def test_frame_nonprintable_bytes(self):
515+
516+
class BinaryThing(object):
517+
518+
def __init__(self, hexed):
519+
self.hexed = hexed
520+
if compat.PY2:
521+
self.binary = hexed.decode('hex')
522+
else:
523+
self.binary = bytes.fromhex(hexed)
524+
525+
def __str__(self):
526+
return self.hexed
527+
528+
hexed = '574b4454ba8c5eb4f98a8f45'
529+
exc_type = OverflowError
530+
binthing = BinaryThing(hexed)
531+
df_printable = DataFrame({'A': [binthing.hexed]})
532+
assert df_printable.to_json() == '{"A":{"0":"%s"}}' % hexed
533+
df_nonprintable = DataFrame({'A': [binthing]})
534+
pytest.raises(exc_type, df_nonprintable.to_json)
535+
# GH14256: failing column caused segfaults, if it is not the last one
536+
df_mixed = DataFrame({'A': [binthing], 'B': [1]},
537+
columns=['A', 'B'])
538+
pytest.raises(exc_type, df_mixed.to_json)
539+
# default_handler should resolve exceptions for non-string types
540+
assert df_nonprintable.to_json(default_handler=str) == \
541+
'{"A":{"0":"%s"}}' % hexed
542+
assert df_mixed.to_json(default_handler=str) == \
543+
'{"A":{"0":"%s"},"B":{"0":1}}' % hexed
544+
545+
def test_label_overflow(self):
546+
df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]})
547+
assert df.to_json() == \
548+
'{"%s":{"0":1},"foo":{"0":1337}}' % ('bar' * 100000)
549+
514550
def test_series_non_unique_index(self):
515551
s = Series(['a', 'b'], index=[1, 1])
516552

0 commit comments

Comments
 (0)