Skip to content

ujson __json__ attribute logic #12739

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions pandas/io/tests/test_json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,16 @@ def test_decodeBigEscape(self):
input = quote + (base * 1024 * 1024 * 2) + quote
output = ujson.decode(input) # noqa

def test_object_default(self):
# An object without toDict or __json__ defined should be serialized
# as an empty dict.
class ObjectTest:
pass

output = ujson.encode(ObjectTest())
dec = ujson.decode(output)
self.assertEquals(dec, {})

def test_toDict(self):
d = {u("key"): 31337}

Expand All @@ -853,11 +863,78 @@ class DictTest:
def toDict(self):
return d

def __json__(self):
return '"json defined"' # Fallback and shouldn't be called.

o = DictTest()
output = ujson.encode(o)
dec = ujson.decode(output)
self.assertEqual(dec, d)

def test_object_with_json(self):
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
output_text = 'this is the correct output'

class JSONTest:

def __json__(self):
return '"' + output_text + '"'

d = {u'key': JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
self.assertEquals(dec, {u'key': output_text})

def test_object_with_json_unicode(self):
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
output_text = u'this is the correct output'

class JSONTest:

def __json__(self):
return u'"' + output_text + u'"'

d = {u'key': JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
self.assertEquals(dec, {u'key': output_text})

def test_object_with_complex_json(self):
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
obj = {u'foo': [u'bar', u'baz']}

class JSONTest:

def __json__(self):
return ujson.encode(obj)

d = {u'key': JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
self.assertEquals(dec, {u'key': obj})

def test_object_with_json_type_error(self):
# __json__ must return a string, otherwise it should raise an error.
for return_value in (None, 1234, 12.34, True, {}):
class JSONTest:
def __json__(self):
return return_value

d = {u'key': JSONTest()}
self.assertRaises(TypeError, ujson.encode, d)

def test_object_with_json_attribute_error(self):
# If __json__ raises an error, make sure python actually raises it.
class JSONTest:
def __json__(self):
raise AttributeError

d = {u'key': JSONTest()}
self.assertRaises(AttributeError, ujson.encode, d)

def test_defaultHandler(self):

class _TestObject(object):
Expand Down Expand Up @@ -1588,6 +1665,51 @@ def test_encodeSet(self):
for v in dec:
self.assertTrue(v in s)

def test_rawJsonInDataFrame(self):

class ujson_as_is(object):

def __init__(self, value):
self.value = value

def __json__(self):
return self.value

def __eq__(self, other):
return ujson.loads(self.value) == ujson.loads(other.value)

__repr__ = __json__

df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
index=['a', 'b'],
columns=['w', 'x', 'y', 'z'])

x_y_ser = df[['x', 'y']].apply(
lambda x: ujson_as_is(ujson.dumps(x.to_dict())),
axis=1
)

expected_result = {
'a': ujson_as_is('{"y":3,"x":2}'),
'b': ujson_as_is('{"y":7,"x":6}')
}
self.assertEqual(x_y_ser.to_dict(), expected_result)

df['x_y'] = x_y_ser
ser_x_y_z = df[['x_y', 'z']].apply(
lambda x: ujson_as_is(ujson.dumps(x.to_dict())),
axis=1
)
df['x_y_z'] = ser_x_y_z

df_json_dump = df[['x_y_z', 'w']].to_json(orient='records')

expected_result = '[{"x_y_z":{"z":4,"x_y":{"y":3,"x":2}},"w":1}' + \
',{"x_y_z":{"z":8,"x_y":{"y":7,"x":6}},"w":5}]'

self.assertEqual(ujson.loads(df_json_dump),
ujson.loads(expected_result))


def _clean_dict(d):
return dict((str(k), v) for k, v in compat.iteritems(d))
Expand Down
1 change: 1 addition & 0 deletions pandas/src/ujson/lib/ultrajson.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ enum JSTYPES
JT_LONG, //(JSINT64 (signed 64-bit))
JT_DOUBLE, //(double)
JT_UTF8, //(char 8-bit)
JT_RAW, //(raw char 8-bit) __json__ attribute
JT_ARRAY, // Array structure
JT_OBJECT, // Key/Value structure
JT_INVALID, // Internal, do not return nor expect
Expand Down
24 changes: 24 additions & 0 deletions pandas/src/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
break;
}


case JT_UTF8:
{
value = enc->getStringValue(obj, &tc, &szlen);
Expand Down Expand Up @@ -870,6 +871,29 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
Buffer_AppendCharUnchecked (enc, '\"');
break;
}

case JT_RAW:
{
value = enc->getStringValue(obj, &tc, &szlen);
if(!value)
{
SetError(obj, enc, "utf-8 encoding error");
return;
}

Buffer_Reserve(enc, RESERVE_STRING(szlen));
if (enc->errorMsg)
{
enc->endTypeContext(obj, &tc);
return;
}

memcpy(enc->offset, value, szlen);
enc->offset += szlen;

break;
}

}

enc->endTypeContext(obj, &tc);
Expand Down
59 changes: 52 additions & 7 deletions pandas/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ typedef struct __TypeContext

double doubleValue;
JSINT64 longValue;
PyObject *rawJSONValue;

char *cStr;
NpyArrContext *npyarr;
Expand Down Expand Up @@ -219,6 +220,7 @@ static TypeContext* createTypeContext(void)
pc->index = 0;
pc->size = 0;
pc->longValue = 0;
pc->rawJSONValue = 0;
pc->doubleValue = 0.0;
pc->cStr = NULL;
pc->npyarr = NULL;
Expand Down Expand Up @@ -364,6 +366,17 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, si
return PyString_AS_STRING(newObj);
}

static void *PyRawJSONToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
PyObject *obj = GET_TC(tc)->rawJSONValue;
if (PyUnicode_Check(obj)) {
return PyUnicodeToUTF8(obj, tc, outValue, _outLen);
}
else {
return PyStringToUTF8(obj, tc, outValue, _outLen);
}
}

static void *PandasDateTimeStructToJSON(pandas_datetimestruct *dts, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
int base = ((PyObjectEncoder*) tc->encoder)->datetimeUnit;
Expand Down Expand Up @@ -1914,7 +1927,7 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
return;
}
else
if (PyString_Check(obj))
if (PyString_Check(obj) && !PyObject_HasAttrString(obj, "__json__"))
{
PRINTMARK();
pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8;
Expand Down Expand Up @@ -2359,10 +2372,9 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
return;
}

toDictFunc = PyObject_GetAttrString(obj, "toDict");

if (toDictFunc)
if (PyObject_HasAttrString(obj, "toDict"))
{
toDictFunc = PyObject_GetAttrString(obj, "toDict");
PyObject* tuple = PyTuple_New(0);
PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL);
Py_DECREF(tuple);
Expand All @@ -2377,9 +2389,7 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)

if (!PyDict_Check(toDictResult))
{
Py_DECREF(toDictResult);
tc->type = JT_NULL;
return;
goto INVALID;
}

PRINTMARK();
Expand All @@ -2392,6 +2402,41 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
pc->dictObj = toDictResult;
return;
}
else
if (PyObject_HasAttrString(obj, "__json__"))
{
PyObject* toJSONFunc = PyObject_GetAttrString(obj, "__json__");
PyObject* tuple = PyTuple_New(0);
PyErr_Clear();
PyObject* toJSONResult = PyObject_Call(toJSONFunc, tuple, NULL);
Py_DECREF(tuple);
Py_DECREF(toJSONFunc);

if (toJSONResult == NULL)
{
goto INVALID;
}

if (PyErr_Occurred())
{
PyErr_Print();
Py_DECREF(toJSONResult);
goto INVALID;
}

if (!PyString_Check(toJSONResult) && !PyUnicode_Check(toJSONResult))
{
Py_DECREF(toJSONResult);
PyErr_Format (PyExc_TypeError, "expected string");
goto INVALID;
}

PRINTMARK();
pc->PyTypeToJSON = PyRawJSONToUTF8;
tc->type = JT_RAW;
GET_TC(tc)->rawJSONValue = toJSONResult;
return;
}

PyErr_Clear();

Expand Down