diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e0fc9d2942efd..dc48baec85a00 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -685,6 +685,93 @@ def to_dict(self): """ return dict((k, v.to_dict()) for k, v in self.iteritems()) + @classmethod + def from_json(cls, json, orient="columns", dtype=None, numpy=True): + """ + Convert JSON string to DataFrame + + Parameters + ---------- + json : The JSON string to parse. + orient : {'split', 'records', 'index', 'columns', 'values'}, + default 'columns' + The format of the JSON string + split : dict like + {index -> [index], columns -> [columns], data -> [values]} + records : list like [{column -> value}, ... , {column -> value}] + index : dict like {index -> {column -> value}} + columns : dict like {column -> {index -> value}} + values : just the values array + dtype : dtype of the resulting DataFrame + nupmpy: direct decoding to numpy arrays. default True but falls back + to standard decoding if a problem occurs. + + Returns + ------- + result : DataFrame + """ + from pandas._ujson import loads + df = None + + if numpy: + try: + if orient == "columns": + args = loads(json, dtype=dtype, numpy=True, labelled=True) + if args: + args = (args[0].T, args[2], args[1]) + df = DataFrame(*args) + elif orient == "split": + df = DataFrame(**loads(json, dtype=dtype, numpy=True)) + elif orient == "values": + df = DataFrame(loads(json, dtype=dtype, numpy=True)) + else: + df = DataFrame(*loads(json, dtype=dtype, numpy=True, + labelled=True)) + except ValueError: + numpy = False + if not numpy: + if orient == "columns": + df = DataFrame(loads(json), dtype=dtype) + elif orient == "split": + df = DataFrame(dtype=dtype, **loads(json)) + elif orient == "index": + df = DataFrame(loads(json), dtype=dtype).T + else: + df = DataFrame(loads(json), dtype=dtype) + + return df + + def to_json(self, orient="columns", double_precision=10, + force_ascii=True): + """ + Convert DataFrame to a JSON string. + + Note NaN's and None will be converted to null and datetime objects + will be converted to UNIX timestamps. + + Parameters + ---------- + orient : {'split', 'records', 'index', 'columns', 'values'}, + default 'columns' + The format of the JSON string + split : dict like + {index -> [index], columns -> [columns], data -> [values]} + records : list like [{column -> value}, ... , {column -> value}] + index : dict like {index -> {column -> value}} + columns : dict like {column -> {index -> value}} + values : just the values array + double_precision : The number of decimal places to use when encoding + floating point values, default 10. + force_ascii : force encoded string to be ASCII, default True. + + Returns + ------- + result : JSON compatible string + """ + from pandas._ujson import dumps + return dumps(self, orient=orient, double_precision=double_precision, + ensure_ascii=force_ascii) + @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, names=None, coerce_float=False): diff --git a/pandas/core/series.py b/pandas/core/series.py index 221972269423c..f1848268369bb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -919,6 +919,77 @@ def to_dict(self): """ return dict(self.iteritems()) + @classmethod + def from_json(cls, json, orient="index", dtype=None, numpy=True): + """ + Convert JSON string to Series + + Parameters + ---------- + json : The JSON string to parse. + orient : {'split', 'records', 'index'}, default 'index' + The format of the JSON string + split : dict like + {index -> [index], name -> name, data -> [values]} + records : list like [value, ... , value] + index : dict like {index -> value} + dtype : dtype of the resulting Series + nupmpy: direct decoding to numpy arrays. default True but falls back + to standard decoding if a problem occurs. + + Returns + ------- + result : Series + """ + from pandas._ujson import loads + s = None + + if numpy: + try: + if orient == "split": + s = Series(**loads(json, dtype=dtype, numpy=True)) + elif orient == "columns" or orient == "index": + s = Series(*loads(json, dtype=dtype, numpy=True, + labelled=True)) + else: + s = Series(loads(json, dtype=dtype, numpy=True)) + except ValueError: + numpy = False + if not numpy: + if orient == "split": + s = Series(dtype=dtype, **loads(json)) + else: + s = Series(loads(json), dtype=dtype) + + return s + + def to_json(self, orient="index", double_precision=10, force_ascii=True): + """ + Convert Series to a JSON string + + Note NaN's and None will be converted to null and datetime objects + will be converted to UNIX timestamps. + + Parameters + ---------- + orient : {'split', 'records', 'index'}, default 'index' + The format of the JSON string + split : dict like + {index -> [index], name -> name, data -> [values]} + records : list like [value, ... , value] + index : dict like {index -> value} + double_precision : The number of decimal places to use when encoding + floating point values, default 10. + force_ascii : force encoded string to be ASCII, default True. + + Returns + ------- + result : JSON compatible string + """ + from pandas._ujson import dumps + return dumps(self, orient=orient, double_precision=double_precision, + ensure_ascii=force_ascii) + def to_sparse(self, kind='block', fill_value=None): """ Convert Series to SparseSeries diff --git a/pandas/src/ujson/lib/ultrajson.h b/pandas/src/ujson/lib/ultrajson.h new file mode 100644 index 0000000000000..0514236e750e1 --- /dev/null +++ b/pandas/src/ujson/lib/ultrajson.h @@ -0,0 +1,301 @@ +/* +Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by ESN Social Software AB (www.esn.me). +4. Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Portions of code from: +MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +*/ + +/* +Ultra fast JSON encoder and decoder +Developed by Jonas Tarnstrom (jonas@esn.me). + +Encoder notes: +------------------ + +:: Cyclic references :: +Cyclic referenced objects are not detected. +Set JSONObjectEncoder.recursionMax to suitable value or make sure input object +tree doesn't have cyclic references. + +*/ + +#ifndef __ULTRAJSON_H__ +#define __ULTRAJSON_H__ + +#include +#include + +//#define JSON_DECODE_NUMERIC_AS_DOUBLE + +// Don't output any extra whitespaces when encoding +#define JSON_NO_EXTRA_WHITESPACE + +// Max decimals to encode double floating point numbers with +#ifndef JSON_DOUBLE_MAX_DECIMALS +#define JSON_DOUBLE_MAX_DECIMALS 15 +#endif + +// Max recursion depth, default for encoder +#ifndef JSON_MAX_RECURSION_DEPTH +#define JSON_MAX_RECURSION_DEPTH 1024 +#endif + +/* +Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */ +#ifndef JSON_MAX_STACK_BUFFER_SIZE +#define JSON_MAX_STACK_BUFFER_SIZE 131072 +#endif + +#ifdef _WIN32 + +typedef __int64 JSINT64; +typedef unsigned __int64 JSUINT64; + +typedef unsigned __int32 uint32_t; +typedef __int32 JSINT32; +typedef uint32_t JSUINT32; +typedef unsigned __int8 JSUINT8; +typedef unsigned __int16 JSUTF16; +typedef unsigned __int32 JSUTF32; +typedef __int64 JSLONG; + +#define EXPORTFUNCTION __declspec(dllexport) + +#define FASTCALL_MSVC __fastcall +#define FASTCALL_ATTR +#define INLINE_PREFIX __inline + +#else + +#include +typedef int64_t JSINT64; +typedef u_int64_t JSUINT64; + +typedef int32_t JSINT32; +typedef u_int32_t JSUINT32; + +#define FASTCALL_MSVC +#define FASTCALL_ATTR __attribute__((fastcall)) +#define INLINE_PREFIX inline + +typedef u_int32_t uint32_t; + +typedef u_int8_t JSUINT8; +typedef u_int16_t JSUTF16; +typedef u_int32_t JSUTF32; + +typedef int64_t JSLONG; + +#define EXPORTFUNCTION +#endif + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define __LITTLE_ENDIAN__ +#else + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define __BIG_ENDIAN__ +#endif + +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#error "Endianess not supported" +#endif + +enum JSTYPES +{ + JT_NULL, // NULL + JT_TRUE, //boolean true + JT_FALSE, //boolean false + JT_INT, //(JSINT32 (signed 32-bit)) + JT_LONG, //(JSINT64 (signed 64-bit)) + JT_DOUBLE, //(double) + JT_UTF8, //(char 8-bit) + JT_ARRAY, // Array structure + JT_OBJECT, // Key/Value structure + JT_INVALID, // Internal, do not return nor expect +}; + +typedef void * JSOBJ; +typedef void * JSITER; + +typedef struct __JSONTypeContext +{ + int type; + void *encoder; + void *prv[32]; +} JSONTypeContext; + +/* +Function pointer declarations, suitable for implementing UltraJSON */ +typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc); +typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc); +typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc); +typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc); +typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc, size_t *outLen); +typedef void *(*JSPFN_MALLOC)(size_t size); +typedef void (*JSPFN_FREE)(void *pptr); +typedef void *(*JSPFN_REALLOC)(void *base, size_t size); + +typedef struct __JSONObjectEncoder +{ + void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); + void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen); + JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); + double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + + /* + Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) + Implementor should setup iteration state in ti->prv + */ + JSPFN_ITERBEGIN iterBegin; + + /* + Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items. + Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this + */ + JSPFN_ITERNEXT iterNext; + + /* + Ends the iteration of an iteratable object. + Any iteration state stored in ti->prv can be freed here + */ + JSPFN_ITEREND iterEnd; + + /* + Returns a reference to the value object of an iterator + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETVALUE iterGetValue; + + /* + Return name of iterator. + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETNAME iterGetName; + + /* + Release a value as indicated by setting ti->release = 1 in the previous getValue call. + The ti->prv array should contain the necessary context to release the value + */ + void (*releaseObject)(JSOBJ obj); + + /* Library functions + Set to NULL to use STDLIB malloc,realloc,free */ + JSPFN_MALLOC malloc; + JSPFN_REALLOC realloc; + JSPFN_FREE free; + + /* + Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/ + int recursionMax; + + /* + Configuration for max decimals of double floating poiunt numbers to encode (0-9) */ + int doublePrecision; + + /* + If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */ + int forceASCII; + + + /* + Set to an error message if error occured */ + const char *errorMsg; + JSOBJ errorObj; + + /* Buffer stuff */ + char *start; + char *offset; + char *end; + int heap; + int level; + +} JSONObjectEncoder; + + +/* +Encode an object structure into JSON. + +Arguments: +obj - An anonymous type representing the object +enc - Function definitions for querying JSOBJ type +buffer - Preallocated buffer to store result in. If NULL function allocates own buffer +cbBuffer - Length of buffer (ignored if buffer is NULL) + +Returns: +Encoded JSON object as a null terminated char string. + +NOTE: +If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer. +Life cycle of the provided buffer must still be handled by caller. + +If the return value doesn't equal the specified buffer caller must release the memory using +JSONObjectEncoder.free or free() as specified when calling this function. +*/ +EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *buffer, size_t cbBuffer); + + + +typedef struct __JSONObjectDecoder +{ + JSOBJ (*newString)(wchar_t *start, wchar_t *end); + int (*objectAddKey)(JSOBJ obj, JSOBJ name, JSOBJ value); + int (*arrayAddItem)(JSOBJ obj, JSOBJ value); + JSOBJ (*newTrue)(void); + JSOBJ (*newFalse)(void); + JSOBJ (*newNull)(void); + JSOBJ (*newObject)(void *decoder); + JSOBJ (*endObject)(JSOBJ obj); + JSOBJ (*newArray)(void *decoder); + JSOBJ (*endArray)(JSOBJ obj); + JSOBJ (*newInt)(JSINT32 value); + JSOBJ (*newLong)(JSINT64 value); + JSOBJ (*newDouble)(double value); + void (*releaseObject)(JSOBJ obj, void *decoder); + JSPFN_MALLOC malloc; + JSPFN_FREE free; + JSPFN_REALLOC realloc; + + char *errorStr; + char *errorOffset; + + + +} JSONObjectDecoder; + +EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer); + +#endif diff --git a/pandas/src/ujson/lib/ultrajsondec.c b/pandas/src/ujson/lib/ultrajsondec.c new file mode 100644 index 0000000000000..591122be82f92 --- /dev/null +++ b/pandas/src/ujson/lib/ultrajsondec.c @@ -0,0 +1,837 @@ +/* +Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by ESN Social Software AB (www.esn.me). +4. Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Portions of code from: +MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +*/ + +#include "ultrajson.h" +#include +#include +#include +#include +#include + +struct DecoderState +{ + char *start; + char *end; + wchar_t *escStart; + wchar_t *escEnd; + int escHeap; + int lastType; + JSONObjectDecoder *dec; +}; + +JSOBJ FASTCALL_MSVC decode_any( struct DecoderState *ds) FASTCALL_ATTR; +typedef JSOBJ (*PFN_DECODER)( struct DecoderState *ds); +#define RETURN_JSOBJ_NULLCHECK(_expr) return(_expr); + +double createDouble(double intNeg, double intValue, double frcValue, int frcDecimalCount) +{ + static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000}; + + return (intValue + (frcValue / g_pow10[frcDecimalCount])) * intNeg; +} + +static JSOBJ SetError( struct DecoderState *ds, int offset, const char *message) +{ + ds->dec->errorOffset = ds->start + offset; + ds->dec->errorStr = (char *) message; + return NULL; +} + + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric ( struct DecoderState *ds) +{ +#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE + double intNeg = 1; + double intValue; +#else + int intNeg = 1; + JSLONG intValue; +#endif + + double expNeg; + int chr; + int decimalCount = 0; + double frcValue = 0.0; + double expValue; + char *offset = ds->start; + + if (*(offset) == '-') + { + offset ++; + intNeg = -1; + } + + // Scan integer part + intValue = 0; + + while (1) + { + chr = (int) (unsigned char) *(offset); + + switch (chr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + //FIXME: Check for arithemtic overflow here + //PERF: Don't do 64-bit arithmetic here unless we know we have to +#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE + intValue = intValue * 10.0 + (double) (chr - 48); +#else + intValue = intValue * 10LL + (JSLONG) (chr - 48); +#endif + offset ++; + break; + + case '.': + offset ++; + goto DECODE_FRACTION; + break; + + case 'e': + case 'E': + offset ++; + goto DECODE_EXPONENT; + break; + + default: + goto BREAK_INT_LOOP; + break; + } + } + +BREAK_INT_LOOP: + + ds->lastType = JT_INT; + ds->start = offset; + + //If input string is LONGLONG_MIN here the value is already negative so we should not flip it + +#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE +#else + if (intValue < 0) + { + intNeg = 1; + } +#endif + + //dbg1 = (intValue * intNeg); + //dbg2 = (JSLONG) dbg1; + +#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE + if (intValue > (double) INT_MAX || intValue < (double) INT_MIN) +#else + if ( (intValue >> 31)) +#endif + { + RETURN_JSOBJ_NULLCHECK(ds->dec->newLong( (JSINT64) (intValue * (JSINT64) intNeg))); + } + else + { + RETURN_JSOBJ_NULLCHECK(ds->dec->newInt( (JSINT32) (intValue * intNeg))); + } + + + +DECODE_FRACTION: + + // Scan fraction part + frcValue = 0.0; + while (1) + { + chr = (int) (unsigned char) *(offset); + + switch (chr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) + { + frcValue = frcValue * 10.0 + (double) (chr - 48); + decimalCount ++; + } + offset ++; + break; + + case 'e': + case 'E': + offset ++; + goto DECODE_EXPONENT; + break; + + default: + goto BREAK_FRC_LOOP; + } + } + +BREAK_FRC_LOOP: + + if (intValue < 0) + { + intNeg = 1; + } + + //FIXME: Check for arithemtic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue, frcValue, decimalCount))); + +DECODE_EXPONENT: + expNeg = 1.0; + + if (*(offset) == '-') + { + expNeg = -1.0; + offset ++; + } + else + if (*(offset) == '+') + { + expNeg = +1.0; + offset ++; + } + + expValue = 0.0; + + while (1) + { + chr = (int) (unsigned char) *(offset); + + switch (chr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + expValue = expValue * 10.0 + (double) (chr - 48); + offset ++; + break; + + default: + goto BREAK_EXP_LOOP; + + } + } + +BREAK_EXP_LOOP: + +#ifdef JSON_DECODE_NUMERIC_AS_DOUBLE +#else + if (intValue < 0) + { + intNeg = 1; + } +#endif + + //FIXME: Check for arithemtic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + RETURN_JSOBJ_NULLCHECK(ds->dec->newDouble (createDouble( (double) intNeg, (double) intValue , frcValue, decimalCount) * pow(10.0, expValue * expNeg))); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds) +{ + char *offset = ds->start; + offset ++; + + if (*(offset++) != 'r') + goto SETERROR; + if (*(offset++) != 'u') + goto SETERROR; + if (*(offset++) != 'e') + goto SETERROR; + + ds->lastType = JT_TRUE; + ds->start = offset; + RETURN_JSOBJ_NULLCHECK(ds->dec->newTrue()); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'true'"); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_false ( struct DecoderState *ds) +{ + char *offset = ds->start; + offset ++; + + if (*(offset++) != 'a') + goto SETERROR; + if (*(offset++) != 'l') + goto SETERROR; + if (*(offset++) != 's') + goto SETERROR; + if (*(offset++) != 'e') + goto SETERROR; + + ds->lastType = JT_FALSE; + ds->start = offset; + RETURN_JSOBJ_NULLCHECK(ds->dec->newFalse()); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'false'"); + +} + + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_null ( struct DecoderState *ds) +{ + char *offset = ds->start; + offset ++; + + if (*(offset++) != 'u') + goto SETERROR; + if (*(offset++) != 'l') + goto SETERROR; + if (*(offset++) != 'l') + goto SETERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + RETURN_JSOBJ_NULLCHECK(ds->dec->newNull()); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'null'"); +} + +FASTCALL_ATTR void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) +{ + char *offset = ds->start; + + while (1) + { + switch (*offset) + { + case ' ': + case '\t': + case '\r': + case '\n': + offset ++; + break; + + default: + ds->start = offset; + return; + } + } +} + + +enum DECODESTRINGSTATE +{ + DS_ISNULL = 0x32, + DS_ISQUOTE, + DS_ISESCAPE, + DS_UTFLENERROR, + +}; + +static const JSUINT8 g_decoderLookup[256] = +{ +/* 0x00 */ DS_ISNULL, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x20 */ 1, 1, DS_ISQUOTE, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, DS_ISESCAPE, 1, 1, 1, +/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, DS_UTFLENERROR, +}; + + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string ( struct DecoderState *ds) +{ + JSUTF16 sur[2] = { 0 }; + int iSur = 0; + int index; + wchar_t *escOffset; + size_t escLen = (ds->escEnd - ds->escStart); + JSUINT8 *inputOffset; + JSUINT8 oct; + JSUTF32 ucs; + ds->lastType = JT_INVALID; + ds->start ++; + + if ( (ds->end - ds->start) > escLen) + { + size_t newSize = (ds->end - ds->start); + + if (ds->escHeap) + { + ds->escStart = (wchar_t *) ds->dec->realloc (ds->escStart, newSize * sizeof(wchar_t)); + } + else + { + wchar_t *oldStart = ds->escStart; + ds->escHeap = 1; + ds->escStart = (wchar_t *) ds->dec->malloc (newSize * sizeof(wchar_t)); + memcpy (ds->escStart, oldStart, escLen * sizeof(wchar_t)); + } + + ds->escEnd = ds->escStart + newSize; + } + + escOffset = ds->escStart; + inputOffset = ds->start; + + while(1) + { + switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) + { + case DS_ISNULL: + return SetError(ds, -1, "Unmatched ''\"' when when decoding 'string'"); + + case DS_ISQUOTE: + ds->lastType = JT_UTF8; + inputOffset ++; + ds->start += ( (char *) inputOffset - (ds->start)); + RETURN_JSOBJ_NULLCHECK(ds->dec->newString(ds->escStart, escOffset)); + + case DS_UTFLENERROR: + return SetError (ds, -1, "Invalid UTF-8 sequence length when decoding 'string'"); + + case DS_ISESCAPE: + inputOffset ++; + switch (*inputOffset) + { + case '\\': *(escOffset++) = L'\\'; inputOffset++; continue; + case '\"': *(escOffset++) = L'\"'; inputOffset++; continue; + case '/': *(escOffset++) = L'/'; inputOffset++; continue; + case 'b': *(escOffset++) = L'\b'; inputOffset++; continue; + case 'f': *(escOffset++) = L'\f'; inputOffset++; continue; + case 'n': *(escOffset++) = L'\n'; inputOffset++; continue; + case 'r': *(escOffset++) = L'\r'; inputOffset++; continue; + case 't': *(escOffset++) = L'\t'; inputOffset++; continue; + + case 'u': + { + int index; + inputOffset ++; + + for (index = 0; index < 4; index ++) + { + switch (*inputOffset) + { + case '\0': return SetError (ds, -1, "Unterminated unicode escape sequence when decoding 'string'"); + default: return SetError (ds, -1, "Unexpected character in unicode escape sequence when decoding 'string'"); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + sur[iSur] = (sur[iSur] << 4) + (JSUTF16) (*inputOffset - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'a'); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + sur[iSur] = (sur[iSur] << 4) + 10 + (JSUTF16) (*inputOffset - 'A'); + break; + } + + inputOffset ++; + } + + + if (iSur == 0) + { + if((sur[iSur] & 0xfc00) == 0xd800) + { + // First of a surrogate pair, continue parsing + iSur ++; + break; + } + (*escOffset++) = (wchar_t) sur[iSur]; + iSur = 0; + } + else + { + // Decode pair + if ((sur[1] & 0xfc00) != 0xdc00) + { + return SetError (ds, -1, "Unpaired high surrogate when decoding 'string'"); + } + +#if WCHAR_MAX == 0xffff + (*escOffset++) = (wchar_t) sur[0]; + (*escOffset++) = (wchar_t) sur[1]; +#else + (*escOffset++) = (wchar_t) 0x10000 + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); +#endif + iSur = 0; + } + break; + } + + case '\0': return SetError(ds, -1, "Unterminated escape sequence when decoding 'string'"); + default: return SetError(ds, -1, "Unrecognized escape sequence when decoding 'string'"); + } + break; + + case 1: + *(escOffset++) = (wchar_t) (*inputOffset++); + break; + + case 2: + { + ucs = (*inputOffset++) & 0x1f; + ucs <<= 6; + if (((*inputOffset) & 0x80) != 0x80) + { + return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); + } + ucs |= (*inputOffset++) & 0x3f; + if (ucs < 0x80) return SetError (ds, -1, "Overlong 2 byte UTF-8 sequence detected when decoding 'string'"); + *(escOffset++) = (wchar_t) ucs; + break; + } + + case 3: + { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x0f; + + for (index = 0; index < 2; index ++) + { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) + { + return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x800) return SetError (ds, -1, "Overlong 3 byte UTF-8 sequence detected when encoding string"); + *(escOffset++) = (wchar_t) ucs; + break; + } + + case 4: + { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x07; + + for (index = 0; index < 3; index ++) + { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) + { + return SetError(ds, -1, "Invalid octet in UTF-8 sequence when decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x10000) return SetError (ds, -1, "Overlong 4 byte UTF-8 sequence detected when decoding 'string'"); + + #if WCHAR_MAX == 0xffff + if (ucs >= 0x10000) + { + ucs -= 0x10000; + *(escOffset++) = (ucs >> 10) + 0xd800; + *(escOffset++) = (ucs & 0x3ff) + 0xdc00; + } + else + { + *(escOffset++) = (wchar_t) ucs; + } + #else + *(escOffset++) = (wchar_t) ucs; + #endif + break; + } + } + } +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array( struct DecoderState *ds) +{ + JSOBJ itemValue; + JSOBJ newObj = ds->dec->newArray(ds->dec); + + ds->lastType = JT_INVALID; + ds->start ++; + + while (1)//(*ds->start) != '\0') + { + SkipWhitespace(ds); + + if ((*ds->start) == ']') + { + ds->start++; + return ds->dec->endArray(newObj); + } + + itemValue = decode_any(ds); + + if (itemValue == NULL) + { + ds->dec->releaseObject(newObj, ds->dec); + return NULL; + } + + if (!ds->dec->arrayAddItem (newObj, itemValue)) + { + ds->dec->releaseObject(newObj, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) + { + case ']': + return ds->dec->endArray(newObj); + + case ',': + break; + + default: + ds->dec->releaseObject(newObj, ds->dec); + return SetError(ds, -1, "Unexpected character in found when decoding array value"); + } + } + + ds->dec->releaseObject(newObj, ds->dec); + return SetError(ds, -1, "Unmatched ']' when decoding 'array'"); +} + + + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object( struct DecoderState *ds) +{ + JSOBJ itemName; + JSOBJ itemValue; + JSOBJ newObj = ds->dec->newObject(ds->dec); + + ds->start ++; + + while (1) + { + SkipWhitespace(ds); + + if ((*ds->start) == '}') + { + ds->start ++; + return ds->dec->endObject(newObj); + } + + ds->lastType = JT_INVALID; + itemName = decode_any(ds); + + if (itemName == NULL) + { + ds->dec->releaseObject(newObj, ds->dec); + return NULL; + } + + if (ds->lastType != JT_UTF8) + { + ds->dec->releaseObject(newObj, ds->dec); + ds->dec->releaseObject(itemName, ds->dec); + return SetError(ds, -1, "Key name of object must be 'string' when decoding 'object'"); + } + + SkipWhitespace(ds); + + if (*(ds->start++) != ':') + { + ds->dec->releaseObject(newObj, ds->dec); + ds->dec->releaseObject(itemName, ds->dec); + return SetError(ds, -1, "No ':' found when decoding object value"); + } + + SkipWhitespace(ds); + + itemValue = decode_any(ds); + + if (itemValue == NULL) + { + ds->dec->releaseObject(newObj, ds->dec); + ds->dec->releaseObject(itemName, ds->dec); + return NULL; + } + + if (!ds->dec->objectAddKey (newObj, itemName, itemValue)) + { + ds->dec->releaseObject(newObj, ds->dec); + ds->dec->releaseObject(itemName, ds->dec); + ds->dec->releaseObject(itemValue, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) + { + case '}': + return ds->dec->endObject(newObj); + + case ',': + break; + + default: + ds->dec->releaseObject(newObj, ds->dec); + return SetError(ds, -1, "Unexpected character in found when decoding object value"); + } + } + + ds->dec->releaseObject(newObj, ds->dec); + return SetError(ds, -1, "Unmatched '}' when decoding object"); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) +{ + while (1) + { + switch (*ds->start) + { + case '\"': + return decode_string (ds); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + return decode_numeric (ds); + + case '[': return decode_array (ds); + case '{': return decode_object (ds); + case 't': return decode_true (ds); + case 'f': return decode_false (ds); + case 'n': return decode_null (ds); + + case ' ': + case '\t': + case '\r': + case '\n': + // White space + ds->start ++; + break; + + default: + return SetError(ds, -1, "Expected object or value"); + } + } +} + + +JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer) +{ + + /* + FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode escaping doesn't run into the wall each time */ + struct DecoderState ds; + wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSOBJ ret; + + ds.start = (char *) buffer; + ds.end = ds.start + cbBuffer; + + ds.escStart = escBuffer; + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escHeap = 0; + ds.dec = dec; + ds.dec->errorStr = NULL; + ds.dec->errorOffset = NULL; + + ds.dec = dec; + + ret = decode_any (&ds); + + if (ds.escHeap) + { + dec->free(ds.escStart); + } + return ret; +} diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/src/ujson/lib/ultrajsonenc.c new file mode 100644 index 0000000000000..6fb25c926c431 --- /dev/null +++ b/pandas/src/ujson/lib/ultrajsonenc.c @@ -0,0 +1,858 @@ +/* +Copyright (c) 2011, Jonas Tarnstrom and ESN Social Software AB +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by ESN Social Software AB (www.esn.me). +4. Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY ESN SOCIAL SOFTWARE AB ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Portions of code from: +MODP_ASCII - Ascii transformations (upper/lower, etc) +http://code.google.com/p/stringencoders/ +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +*/ + +#include "ultrajson.h" +#include +#include +#include +#include +#include + +#include + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000}; +static const char g_hexChars[] = "0123456789abcdef"; +static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; + + +/* +FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands. +Needs a cleanup and more documentation */ + +/* +Table for pure ascii output escaping all characters above 127 to \uXXXX */ +static const JSUINT8 g_asciiOutputTable[256] = +{ +/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30, +/* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, +/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, +/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1, +/* 0x60 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0x90 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xa0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xb0 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 0xc0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* 0xd0 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* 0xe0 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +}; + + +static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message) +{ + enc->errorMsg = message; + enc->errorObj = obj; +} + +/* +FIXME: Keep track of how big these get across several encoder calls and try to make an estimate +That way we won't run our head into the wall each call */ +void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded) +{ + size_t curSize = enc->end - enc->start; + size_t newSize = curSize * 2; + size_t offset = enc->offset - enc->start; + + while (newSize < curSize + cbNeeded) + { + newSize *= 2; + } + + if (enc->heap) + { + enc->start = (char *) enc->realloc (enc->start, newSize); + } + else + { + char *oldStart = enc->start; + enc->heap = 1; + enc->start = (char *) enc->malloc (newSize); + memcpy (enc->start, oldStart, offset); + } + enc->offset = enc->start + offset; + enc->end = enc->start + newSize; +} + +FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (char *outputOffset, unsigned short value) +{ + *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; + *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; + *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; + *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; +} + +int Buffer_EscapeStringUnvalidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end) +{ + char *of = (char *) enc->offset; + + while (1) + { + switch (*io) + { + case 0x00: + if (io < end) + { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + break; + } + else + { + enc->offset += (of - enc->offset); + return TRUE; + } + + case '\"': (*of++) = '\\'; (*of++) = '\"'; break; + case '\\': (*of++) = '\\'; (*of++) = '\\'; break; + case '/': (*of++) = '\\'; (*of++) = '/'; break; + case '\b': (*of++) = '\\'; (*of++) = 'b'; break; + case '\f': (*of++) = '\\'; (*of++) = 'f'; break; + case '\n': (*of++) = '\\'; (*of++) = 'n'; break; + case '\r': (*of++) = '\\'; (*of++) = 'r'; break; + case '\t': (*of++) = '\\'; (*of++) = 't'; break; + + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)]; + break; + + default: (*of++) = (*io); break; + } + + io++; + } + + return FALSE; +} + + +/* +FIXME: +This code only works with Little and Big Endian + +FIXME: The JSON spec says escape "/" but non of the others do and we don't +want to be left alone doing it so we don't :) + +*/ +int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end) +{ + JSUTF32 ucs; + char *of = (char *) enc->offset; + + while (1) + { + + //JSUINT8 chr = (unsigned char) *io; + JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io]; + + switch (utflen) + { + case 0: + { + if (io < end) + { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + io ++; + continue; + } + else + { + enc->offset += (of - enc->offset); + return TRUE; + } + } + + case 1: + { + *(of++)= (*io++); + continue; + } + + case 2: + { + JSUTF32 in; + + if (io + 1 > end) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + in = *((JSUTF16 *) io); + +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); +#else + ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x80) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Overlong 2 byte UTF-8 sequence detected when encoding string"); + return FALSE; + } + + io += 2; + break; + } + + case 3: + { + JSUTF32 in; + + if (io + 2 > end) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + +#ifdef __LITTLE_ENDIAN__ + in = *((JSUTF16 *) io); + in |= *((JSUINT8 *) io + 2) << 16; + ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | ((in & 0x3f0000) >> 16); +#else + in = *((JSUTF16 *) io) << 8; + in |= *((JSUINT8 *) io + 2); + ucs = ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + + + if (ucs < 0x800) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Overlong 3 byte UTF-8 sequence detected when encoding string"); + return FALSE; + } + + io += 3; + break; + } + case 4: + { + JSUTF32 in; + + if (io + 3 > end) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + +#ifdef __LITTLE_ENDIAN__ + in = *((JSUTF32 *) io); + ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); +#else + in = *((JSUTF32 *) io); + ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + if (ucs < 0x10000) + { + enc->offset += (of - enc->offset); + SetError (obj, enc, "Overlong 4 byte UTF-8 sequence detected when encoding string"); + return FALSE; + } + + io += 4; + break; + } + + + case 5: + case 6: + enc->offset += (of - enc->offset); + SetError (obj, enc, "Unsupported UTF-8 sequence length when encoding string"); + return FALSE; + + case 30: + // \uXXXX encode + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)]; + io ++; + continue; + + case 10: + case 12: + case 14: + case 16: + case 18: + case 20: + case 22: + case 24: + *(of++) = *( (char *) (g_escapeChars + utflen + 0)); + *(of++) = *( (char *) (g_escapeChars + utflen + 1)); + io ++; + continue; + } + + /* + If the character is a UTF8 sequence of length > 1 we end up here */ + if (ucs >= 0x10000) + { + ucs -= 0x10000; + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (ucs >> 10) + 0xd800); + of += 4; + + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (ucs & 0x3ff) + 0xdc00); + of += 4; + } + else + { + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, ucs); + of += 4; + } + } + + return FALSE; +} + +#define Buffer_Reserve(__enc, __len) \ + if ((__enc)->offset + (__len) > (__enc)->end) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + + +#define Buffer_AppendCharUnchecked(__enc, __chr) \ + *((__enc)->offset++) = __chr; \ + +FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end) +{ + char aux; + while (end > begin) + aux = *end, *end-- = *begin, *begin++ = aux; +} + +void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) +{ + char* wstr; + JSUINT32 uvalue = (value < 0) ? -value : value; + + wstr = enc->offset; + // Conversion. Number is reversed. + + do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset,wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) +{ + char* wstr; + JSUINT64 uvalue = (value < 0) ? -value : value; + + wstr = enc->offset; + // Conversion. Number is reversed. + + do *wstr++ = (char)(48 + (uvalue % 10ULL)); while(uvalue /= 10ULL); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset,wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, double value) +{ + /* if input is larger than thres_max, revert to exponential */ + const double thres_max = (double) 1e16 - 1; + int count; + double diff = 0.0; + char* str = enc->offset; + char* wstr = str; + unsigned long long whole; + double tmp; + unsigned long long frac; + int neg; + double pow10; + + if (value == HUGE_VAL || value == -HUGE_VAL) + { + SetError (obj, enc, "Invalid Inf value when encoding double"); + return FALSE; + } + if (! (value == value)) + { + SetError (obj, enc, "Invalid Nan value when encoding double"); + return FALSE; + } + + + /* we'll work in positive values and deal with the + negative sign issue later */ + neg = 0; + if (value < 0) + { + neg = 1; + value = -value; + } + + pow10 = g_pow10[enc->doublePrecision]; + + whole = (unsigned long long) value; + tmp = (value - whole) * pow10; + frac = (unsigned long long)(tmp); + diff = tmp - frac; + + if (diff > 0.5) + { + ++frac; + /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ + if (frac >= pow10) + { + frac = 0; + ++whole; + } + } + else + if (diff == 0.5 && ((frac == 0) || (frac & 1))) + { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + /* for very large numbers switch back to native sprintf for exponentials. + anyone want to write code to replace this? */ + /* + normal printf behavior is to print EVERY whole number digit + which can be 100s of characters overflowing your buffers == bad + */ + if (value > thres_max) + { + enc->offset += sprintf(str, "%.15e", neg ? -value : value); + return TRUE; + } + + if (enc->doublePrecision == 0) + { + diff = value - whole; + + if (diff > 0.5) + { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } + else + if (diff == 0.5 && (whole & 1)) + { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + + //vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } + else + if (frac) + { + count = enc->doublePrecision; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) + { + --count; + frac /= 10; + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + + // now do fractional part, as an unsigned number + do + { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) + { + *wstr++ = '0'; + } + // add decimal + *wstr++ = '.'; + } + else + { + *wstr++ = '0'; + *wstr++ = '.'; + } + + // do whole part + // Take care of sign + // Conversion. Number is reversed. + do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10); + + if (neg) + { + *wstr++ = '-'; + } + strreverse(str, wstr-1); + enc->offset += (wstr - (enc->offset)); + + return TRUE; +} + + + + + + +/* +FIXME: +Handle integration functions returning NULL here */ + +/* +FIXME: +Perhaps implement recursion detection */ + +void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName) +{ + JSONTypeContext tc; + tc.encoder = enc; + size_t szlen; + + if (enc->level > enc->recursionMax) + { + SetError (obj, enc, "Maximum recursion level reached"); + return; + } + + /* + This reservation must hold + + length of _name as encoded worst case + + maxLength of double to string OR maxLength of JSLONG to string + + Since input is assumed to be UTF-8 the worst character length is: + + 4 bytes (of UTF-8) => "\uXXXX\uXXXX" (12 bytes) + */ + + Buffer_Reserve(enc, 256 + (((cbName / 4) + 1) * 12)); + + if (name) + { + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) + { + if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) + { + return; + } + } + else + { + if (!Buffer_EscapeStringUnvalidated(obj, enc, name, name + cbName)) + { + return; + } + } + + + Buffer_AppendCharUnchecked(enc, '\"'); + + Buffer_AppendCharUnchecked (enc, ':'); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked (enc, ' '); +#endif + } + + enc->beginTypeContext(obj, &tc); + + switch (tc.type) + { + case JT_INVALID: + return; + + case JT_ARRAY: + { + int count = 0; + JSOBJ iterObj; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked (enc, '['); + + while (enc->iterNext(obj, &tc)) + { + if (count > 0) + { + Buffer_AppendCharUnchecked (enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked (buffer, ' '); +#endif + } + + iterObj = enc->iterGetValue(obj, &tc); + + enc->level ++; + encode (iterObj, enc, NULL, 0); + count ++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendCharUnchecked (enc, ']'); + break; + } + + case JT_OBJECT: + { + int count = 0; + JSOBJ iterObj; + char *objName; + + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked (enc, '{'); + + while (enc->iterNext(obj, &tc)) + { + if (count > 0) + { + Buffer_AppendCharUnchecked (enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked (enc, ' '); +#endif + } + + iterObj = enc->iterGetValue(obj, &tc); + objName = enc->iterGetName(obj, &tc, &szlen); + + enc->level ++; + encode (iterObj, enc, objName, szlen); + count ++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendCharUnchecked (enc, '}'); + break; + } + + case JT_LONG: + { + Buffer_AppendLongUnchecked (enc, enc->getLongValue(obj, &tc)); + break; + } + + case JT_INT: + { + Buffer_AppendIntUnchecked (enc, enc->getIntValue(obj, &tc)); + break; + } + + case JT_TRUE: + { + Buffer_AppendCharUnchecked (enc, 't'); + Buffer_AppendCharUnchecked (enc, 'r'); + Buffer_AppendCharUnchecked (enc, 'u'); + Buffer_AppendCharUnchecked (enc, 'e'); + break; + } + + case JT_FALSE: + { + Buffer_AppendCharUnchecked (enc, 'f'); + Buffer_AppendCharUnchecked (enc, 'a'); + Buffer_AppendCharUnchecked (enc, 'l'); + Buffer_AppendCharUnchecked (enc, 's'); + Buffer_AppendCharUnchecked (enc, 'e'); + break; + } + + + case JT_NULL: + { + Buffer_AppendCharUnchecked (enc, 'n'); + Buffer_AppendCharUnchecked (enc, 'u'); + Buffer_AppendCharUnchecked (enc, 'l'); + Buffer_AppendCharUnchecked (enc, 'l'); + break; + } + + case JT_DOUBLE: + { + if (!Buffer_AppendDoubleUnchecked (obj, enc, enc->getDoubleValue(obj, &tc))) + { + enc->endTypeContext(obj, &tc); + enc->level --; + return; + } + break; + } + + case JT_UTF8: + { + const char *value = enc->getStringValue(obj, &tc, &szlen); + Buffer_Reserve(enc, ((szlen / 4) + 1) * 12); + Buffer_AppendCharUnchecked (enc, '\"'); + + + if (enc->forceASCII) + { + if (!Buffer_EscapeStringValidated(obj, enc, value, value + szlen)) + { + enc->endTypeContext(obj, &tc); + enc->level --; + return; + } + } + else + { + if (!Buffer_EscapeStringUnvalidated(obj, enc, value, value + szlen)) + { + enc->endTypeContext(obj, &tc); + enc->level --; + return; + } + } + + Buffer_AppendCharUnchecked (enc, '\"'); + break; + } + } + + enc->endTypeContext(obj, &tc); + enc->level --; + +} + +char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t _cbBuffer) +{ + enc->malloc = enc->malloc ? enc->malloc : malloc; + enc->free = enc->free ? enc->free : free; + enc->realloc = enc->realloc ? enc->realloc : realloc; + enc->errorMsg = NULL; + enc->errorObj = NULL; + enc->level = 0; + + if (enc->recursionMax < 1) + { + enc->recursionMax = JSON_MAX_RECURSION_DEPTH; + } + + if (enc->doublePrecision < 0 || + enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) + { + enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; + } + + if (_buffer == NULL) + { + _cbBuffer = 32768; + enc->start = (char *) enc->malloc (_cbBuffer); + enc->heap = 1; + } + else + { + enc->start = _buffer; + enc->heap = 0; + } + + enc->end = enc->start + _cbBuffer; + enc->offset = enc->start; + + + encode (obj, enc, NULL, 0); + + Buffer_Reserve(enc, 1); + Buffer_AppendCharUnchecked(enc, '\0'); + + return enc->start; +} diff --git a/pandas/src/ujson/python/JSONtoObj.c b/pandas/src/ujson/python/JSONtoObj.c new file mode 100644 index 0000000000000..cd89b02255f9e --- /dev/null +++ b/pandas/src/ujson/python/JSONtoObj.c @@ -0,0 +1,650 @@ +#include +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#define NO_IMPORT_ARRAY +#include +#include + + +typedef struct __PyObjectDecoder +{ + JSONObjectDecoder dec; + + void* npyarr; // Numpy context buffer + npy_intp curdim; // Current array dimension + + PyArray_Descr* dtype; +} PyObjectDecoder; + +typedef struct __NpyArrContext +{ + PyObject* ret; + PyObject* labels[2]; + PyArray_Dims shape; + + PyObjectDecoder* dec; + + npy_intp i; + npy_intp elsize; + npy_intp elcount; +} NpyArrContext; + +//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__) +#define PRINTMARK() + +// Numpy handling based on numpy internal code, specifically the function +// PyArray_FromIter. + +// numpy related functions are inter-dependent so declare them all here, +// to ensure the compiler catches any errors + +// standard numpy array handling +JSOBJ Object_npyNewArray(void* decoder); +JSOBJ Object_npyEndArray(JSOBJ obj); +int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value); + +// for more complex dtypes (object and string) fill a standard Python list +// and convert to a numpy array when done. +JSOBJ Object_npyNewArrayList(void* decoder); +JSOBJ Object_npyEndArrayList(JSOBJ obj); +int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value); + +// labelled support, encode keys and values of JS object into separate numpy +// arrays +JSOBJ Object_npyNewObject(void* decoder); +JSOBJ Object_npyEndObject(JSOBJ obj); +int Object_npyObjectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value); + + +// free the numpy context buffer +void Npy_releaseContext(NpyArrContext* npyarr) +{ + PRINTMARK(); + if (npyarr) + { + if (npyarr->shape.ptr) + { + PyObject_Free(npyarr->shape.ptr); + } + if (npyarr->dec) + { + // Don't set to null, used to make sure we don't Py_DECREF npyarr + // in releaseObject + // npyarr->dec->npyarr = NULL; + npyarr->dec->curdim = 0; + } + Py_XDECREF(npyarr->labels[0]); + Py_XDECREF(npyarr->labels[1]); + Py_XDECREF(npyarr->ret); + PyObject_Free(npyarr); + } +} + +JSOBJ Object_npyNewArray(void* _decoder) +{ + PRINTMARK(); + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + NpyArrContext* npyarr; + if (decoder->curdim <= 0) + { + // start of array - initialise the context buffer + npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + + if (!npyarr) + { + PyErr_NoMemory(); + return NULL; + } + + npyarr->dec = decoder; + npyarr->labels[0] = npyarr->labels[1] = NULL; + + npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp)*NPY_MAXDIMS); + npyarr->shape.len = 1; + npyarr->ret = NULL; + + npyarr->elsize = 0; + npyarr->elcount = 4; + npyarr->i = 0; + } + else + { + // starting a new dimension continue the current array (and reshape after) + npyarr = (NpyArrContext*) decoder->npyarr; + if (decoder->curdim >= npyarr->shape.len) + { + npyarr->shape.len++; + } + } + + npyarr->shape.ptr[decoder->curdim] = 0; + decoder->curdim++; + return npyarr; +} + +JSOBJ Object_npyEndArray(JSOBJ obj) +{ + PRINTMARK(); + NpyArrContext* npyarr = (NpyArrContext*) obj; + if (!npyarr) + { + return NULL; + } + + PyObject* ret = npyarr->ret; + int emptyType = NPY_DEFAULT_TYPE; + npy_intp i = npyarr->i; + char* new_data; + + npyarr->dec->curdim--; + + if (i == 0 || !npyarr->ret) { + // empty array would not have been initialised so do it now. + if (npyarr->dec->dtype) + { + emptyType = npyarr->dec->dtype->type_num; + } + npyarr->ret = ret = PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); + } + else if (npyarr->dec->curdim <= 0) + { + // realloc to final size + new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); + if (new_data == NULL) { + PyErr_NoMemory(); + Npy_releaseContext(npyarr); + return NULL; + } + ((char*)PyArray_DATA(ret)) = new_data; + } + + if (npyarr->dec->curdim <= 0) + { + // finished decoding array, reshape if necessary + if (npyarr->shape.len > 1) + { + npyarr->ret = PyArray_Newshape((PyArrayObject*) ret, &npyarr->shape, NPY_ANYORDER); + Py_DECREF(ret); + ret = npyarr->ret; + } + + if (npyarr->labels[0] || npyarr->labels[1]) + { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len+1); + for (i = 0; i < npyarr->shape.len; i++) + { + if (npyarr->labels[i]) + { + PyTuple_SET_ITEM(ret, i+1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } + else + { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i+1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + npyarr->ret = NULL; + Npy_releaseContext(npyarr); + } + + return ret; +} + +int Object_npyArrayAddItem(JSOBJ obj, JSOBJ value) +{ + PRINTMARK(); + NpyArrContext* npyarr = (NpyArrContext*) obj; + if (!npyarr) + { + return 0; + } + + PyObject* type; + PyArray_Descr* dtype; + npy_intp i = npyarr->i; + char *new_data, *item; + + npyarr->shape.ptr[npyarr->dec->curdim-1]++; + + if (PyArray_Check(value)) + { + // multidimensional array, keep decoding values. + return 1; + } + + if (!npyarr->ret) + { + // Array not initialised yet. + // We do it here so we can 'sniff' the data type if none was provided + if (!npyarr->dec->dtype) + { + type = PyObject_Type(value); + if(!PyArray_DescrConverter(type, &dtype)) + { + Py_DECREF(type); + goto fail; + } + Py_INCREF(dtype); + Py_DECREF(type); + } + else + { + dtype = PyArray_DescrNew(npyarr->dec->dtype); + } + + // If it's an object or string then fill a Python list and subsequently + // convert. Otherwise we would need to somehow mess about with + // reference counts when renewing memory. + npyarr->elsize = dtype->elsize; + if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) + { + Py_XDECREF(dtype); + + if (npyarr->dec->curdim > 1) + { + PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); + goto fail; + } + npyarr->ret = PyList_New(0); + if (!npyarr->ret) + { + goto fail; + } + ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArrayList; + ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayListAddItem; + ((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArrayList; + return Object_npyArrayListAddItem(obj, value); + } + + npyarr->ret = PyArray_NewFromDescr(&PyArray_Type, dtype, 1, + &npyarr->elcount, NULL,NULL, 0, NULL); + + if (!npyarr->ret) + { + goto fail; + } + } + + if (i >= npyarr->elcount) { + // Grow PyArray_DATA(ret): + // this is similar for the strategy for PyListObject, but we use + // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + if (npyarr->elsize == 0) + { + PyErr_SetString(PyExc_ValueError, "Cannot decode multidimensional arrays with variable length elements to numpy"); + goto fail; + } + + npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; + if (npyarr->elcount <= NPY_MAX_INTP/npyarr->elsize) { + new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), npyarr->elcount * npyarr->elsize); + } + else { + PyErr_NoMemory(); + goto fail; + } + ((char*)PyArray_DATA(npyarr->ret)) = new_data; + } + + PyArray_DIMS(npyarr->ret)[0] = i + 1; + + if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL + || PyArray_SETITEM(npyarr->ret, item, value) == -1) { + goto fail; + } + + Py_DECREF( (PyObject *) value); + npyarr->i++; + return 1; + +fail: + + Npy_releaseContext(npyarr); + return 0; +} + +JSOBJ Object_npyNewArrayList(void* _decoder) +{ + PRINTMARK(); + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + PyErr_SetString(PyExc_ValueError, "nesting not supported for object or variable length dtypes"); + Npy_releaseContext(decoder->npyarr); + return NULL; +} + +JSOBJ Object_npyEndArrayList(JSOBJ obj) +{ + PRINTMARK(); + NpyArrContext* npyarr = (NpyArrContext*) obj; + if (!npyarr) + { + return NULL; + } + + // convert decoded list to numpy array + PyObject* list = (PyObject *) npyarr->ret; + PyObject* ret = PyArray_FROM_O(list); + + ((JSONObjectDecoder*)npyarr->dec)->newArray = Object_npyNewArray; + ((JSONObjectDecoder*)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; + ((JSONObjectDecoder*)npyarr->dec)->endArray = Object_npyEndArray; + Npy_releaseContext(npyarr); + return ret; +} + +int Object_npyArrayListAddItem(JSOBJ obj, JSOBJ value) +{ + PRINTMARK(); + NpyArrContext* npyarr = (NpyArrContext*) obj; + if (!npyarr) + { + return 0; + } + PyList_Append((PyObject*) npyarr->ret, value); + Py_DECREF( (PyObject *) value); + return 1; +} + + +JSOBJ Object_npyNewObject(void* _decoder) +{ + PRINTMARK(); + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + if (decoder->curdim > 1) + { + PyErr_SetString(PyExc_ValueError, "labels only supported up to 2 dimensions"); + return NULL; + } + + return ((JSONObjectDecoder*)decoder)->newArray(decoder); +} + +JSOBJ Object_npyEndObject(JSOBJ obj) +{ + PRINTMARK(); + NpyArrContext* npyarr = (NpyArrContext*) obj; + if (!npyarr) + { + return NULL; + } + + npy_intp labelidx = npyarr->dec->curdim-1; + + PyObject* list = npyarr->labels[labelidx]; + if (list) + { + npyarr->labels[labelidx] = PyArray_FROM_O(list); + Py_DECREF(list); + } + + return (PyObject*) ((JSONObjectDecoder*)npyarr->dec)->endArray(obj); +} + +int Object_npyObjectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value) +{ + PRINTMARK(); + // add key to label array, value to values array + NpyArrContext* npyarr = (NpyArrContext*) obj; + if (!npyarr) + { + return 0; + } + + PyObject* label = (PyObject*) name; + npy_intp labelidx = npyarr->dec->curdim-1; + + if (!npyarr->labels[labelidx]) + { + npyarr->labels[labelidx] = PyList_New(0); + } + + // only fill label array once, assumes all column labels are the same + // for 2-dimensional arrays. + if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount) + { + PyList_Append(npyarr->labels[labelidx], label); + } + + if(((JSONObjectDecoder*)npyarr->dec)->arrayAddItem(obj, value)) + { + Py_DECREF(label); + return 1; + } + return 0; +} + +int Object_objectAddKey(JSOBJ obj, JSOBJ name, JSOBJ value) +{ + PyDict_SetItem (obj, name, value); + Py_DECREF( (PyObject *) name); + Py_DECREF( (PyObject *) value); + return 1; +} + +int Object_arrayAddItem(JSOBJ obj, JSOBJ value) +{ + PyList_Append(obj, value); + Py_DECREF( (PyObject *) value); + return 1; +} + +JSOBJ Object_newString(wchar_t *start, wchar_t *end) +{ + return PyUnicode_FromWideChar (start, (end - start)); +} + +JSOBJ Object_newTrue(void) +{ + Py_RETURN_TRUE; +} + +JSOBJ Object_newFalse(void) +{ + Py_RETURN_FALSE; +} + +JSOBJ Object_newNull(void) +{ + Py_RETURN_NONE; +} + +JSOBJ Object_newObject(void* decoder) +{ + return PyDict_New(); +} + +JSOBJ Object_endObject(JSOBJ obj) +{ + return obj; +} + +JSOBJ Object_newArray(void* decoder) +{ + return PyList_New(0); +} + +JSOBJ Object_endArray(JSOBJ obj) +{ + return obj; +} + +JSOBJ Object_newInteger(JSINT32 value) +{ + return PyInt_FromLong( (long) value); +} + +JSOBJ Object_newLong(JSINT64 value) +{ + return PyLong_FromLongLong (value); +} + +JSOBJ Object_newDouble(double value) +{ + return PyFloat_FromDouble(value); +} + +static void Object_releaseObject(JSOBJ obj, void* _decoder) +{ + PyObjectDecoder* decoder = (PyObjectDecoder*) _decoder; + if (obj != decoder->npyarr) + { + Py_XDECREF( ((PyObject *)obj)); + } +} + + +PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs) +{ + PRINTMARK(); + static char *kwlist[] = { "obj", "numpy", "labelled", "dtype", NULL}; + + PyObject *ret; + PyObject *sarg; + PyArray_Descr *dtype = NULL; + int numpy = 0, labelled = 0, decref = 0; + + PyObjectDecoder pyDecoder = + { + { + Object_newString, + Object_objectAddKey, + Object_arrayAddItem, + Object_newTrue, + Object_newFalse, + Object_newNull, + Object_newObject, + Object_endObject, + Object_newArray, + Object_endArray, + Object_newInteger, + Object_newLong, + Object_newDouble, + Object_releaseObject, + PyObject_Malloc, + PyObject_Free, + PyObject_Realloc, + } + }; + + pyDecoder.curdim = 0; + pyDecoder.npyarr = NULL; + + JSONObjectDecoder* decoder = (JSONObjectDecoder*) &pyDecoder; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|iiO&", kwlist, &sarg, &numpy, &labelled, PyArray_DescrConverter, &dtype)) + { + return NULL; + } + + if (PyUnicode_Check(sarg)) + { + sarg = PyUnicode_AsUTF8String(sarg); + if (sarg == NULL) + { + //Exception raised above us by codec according to docs + return NULL; + } + decref = 1; + } + else + if (!PyString_Check(sarg)) + { + PyErr_Format(PyExc_TypeError, "Expected String or Unicode"); + return NULL; + } + + if (numpy) + { + pyDecoder.dtype = dtype; + decoder->newArray = Object_npyNewArray; + decoder->endArray = Object_npyEndArray; + decoder->arrayAddItem = Object_npyArrayAddItem; + + if (labelled) + { + decoder->newObject = Object_npyNewObject; + decoder->endObject = Object_npyEndObject; + decoder->objectAddKey = Object_npyObjectAddKey; + } + } + + decoder->errorStr = NULL; + decoder->errorOffset = NULL; + + PRINTMARK(); + ret = JSON_DecodeObject(decoder, PyString_AS_STRING(sarg), PyString_GET_SIZE(sarg)); + PRINTMARK(); + + if (decref) + { + Py_DECREF(sarg); + } + + if (PyErr_Occurred()) + { + return NULL; + } + + if (decoder->errorStr) + { + /*FIXME: It's possible to give a much nicer error message here with actual failing element in input etc*/ + PyErr_Format (PyExc_ValueError, "%s", decoder->errorStr); + Py_XDECREF( (PyObject *) ret); + Npy_releaseContext(pyDecoder.npyarr); + + return NULL; + } + + return ret; +} + +PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs) +{ + PyObject *file; + PyObject *read; + PyObject *string; + PyObject *result; + PyObject *argtuple; + + if (!PyArg_ParseTuple (args, "O", &file)) { + return NULL; + } + + if (!PyObject_HasAttrString (file, "read")) + { + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } + + read = PyObject_GetAttrString (file, "read"); + + if (!PyCallable_Check (read)) { + Py_XDECREF(read); + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } + + string = PyObject_CallObject (read, NULL); + Py_XDECREF(read); + + if (string == NULL) + { + return NULL; + } + + argtuple = PyTuple_Pack(1, string); + + result = JSONToObj (self, argtuple, kwargs); + Py_XDECREF(string); + Py_DECREF(argtuple); + + if (result == NULL) { + return NULL; + } + + return result; +} + diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c new file mode 100644 index 0000000000000..511da32d6f4f7 --- /dev/null +++ b/pandas/src/ujson/python/objToJSON.c @@ -0,0 +1,1604 @@ +#include +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#include +#include +#include +#include +#include + +#define EPOCH_ORD 719163 + +static PyObject* cls_dataframe; +static PyObject* cls_series; +static PyObject* cls_index; + +typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti, void *outValue, size_t *_outLen); + + +#if (PY_VERSION_HEX < 0x02050000) +typedef ssize_t Py_ssize_t; +#endif + +typedef struct __NpyArrContext +{ + PyObject *array; + char* dataptr; + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) + npy_intp dim; + npy_intp stride; + npy_intp ndim; + npy_intp index[NPY_MAXDIMS]; + PyArray_GetItemFunc* getitem; + + char** rowLabels; + char** columnLabels; +} NpyArrContext; + +typedef struct __TypeContext +{ + JSPFN_ITERBEGIN iterBegin; + JSPFN_ITEREND iterEnd; + JSPFN_ITERNEXT iterNext; + JSPFN_ITERGETNAME iterGetName; + JSPFN_ITERGETVALUE iterGetValue; + PFN_PyTypeToJSON PyTypeToJSON; + PyObject *newObj; + PyObject *dictObj; + Py_ssize_t index; + Py_ssize_t size; + PyObject *itemValue; + PyObject *itemName; + PyObject *attrList; + char *citemName; + + JSINT64 longValue; + + NpyArrContext *npyarr; + int transpose; + char** rowLabels; + char** columnLabels; + npy_intp rowLabelsLen; + npy_intp columnLabelsLen; + +} TypeContext; + +typedef struct __PyObjectEncoder +{ + JSONObjectEncoder enc; + + // pass through the NpyArrContext when encoding multi-dimensional arrays + NpyArrContext* npyCtxtPassthru; + + // output format style for pandas data types + int outputFormat; + int originalOutputFormat; +} PyObjectEncoder; + +#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) + +struct PyDictIterState +{ + PyObject *keys; + size_t i; + size_t sz; +}; + +enum PANDAS_FORMAT +{ + SPLIT, + RECORDS, + INDEX, + COLUMNS, + VALUES +}; + +//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__) +#define PRINTMARK() + +void initObjToJSON(void) +{ + PyDateTime_IMPORT; + + PyObject *mod_frame = PyImport_ImportModule("pandas.core.frame"); + if (mod_frame) + { + cls_dataframe = PyObject_GetAttrString(mod_frame, "DataFrame"); + cls_index = PyObject_GetAttrString(mod_frame, "Index"); + cls_series = PyObject_GetAttrString(mod_frame, "Series"); + Py_DECREF(mod_frame); + } + + /* Initialise numpy API */ + import_array(); +} + +static void *PyIntToINT32(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + *((JSINT32 *) outValue) = PyInt_AS_LONG (obj); + return NULL; +} + +static void *PyIntToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + *((JSINT64 *) outValue) = PyInt_AS_LONG (obj); + return NULL; +} + +static void *PyLongToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + *((JSINT64 *) outValue) = GET_TC(tc)->longValue; + return NULL; +} + +static void *NpyHalfToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + unsigned long ctype; + PyArray_ScalarAsCtype(obj, &ctype); + *((double *) outValue) = npy_half_to_double (ctype); + return NULL; +} + +static void *NpyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + PyArray_CastScalarToCtype(obj, outValue, PyArray_DescrFromType(NPY_DOUBLE)); + return NULL; +} + +static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + *((double *) outValue) = PyFloat_AS_DOUBLE (obj); + return NULL; +} + +static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + *_outLen = PyString_GET_SIZE(obj); + return PyString_AS_STRING(obj); +} + +static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + PyObject *newObj = PyUnicode_EncodeUTF8 (PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), NULL); + + GET_TC(tc)->newObj = newObj; + + *_outLen = PyString_GET_SIZE(newObj); + return PyString_AS_STRING(newObj); +} + +static void *NpyDateTimeToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + PyArray_CastScalarToCtype(obj, outValue, PyArray_DescrFromType(NPY_DATETIME)); + return NULL; +} + +static void *PyDateTimeToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + int y, m, d, h, mn, s, days; + + y = PyDateTime_GET_YEAR(obj); + m = PyDateTime_GET_MONTH(obj); + d = PyDateTime_GET_DAY(obj); + h = PyDateTime_DATE_GET_HOUR(obj); + mn = PyDateTime_DATE_GET_MINUTE(obj); + s = PyDateTime_DATE_GET_SECOND(obj); + + days = PyInt_AS_LONG(PyObject_CallMethod(PyDate_FromDate(y, m, 1), "toordinal", NULL)) - EPOCH_ORD + d - 1; + *( (JSINT64 *) outValue) = (((JSINT64) ((days * 24 + h) * 60 + mn)) * 60 + s); + return NULL; +} + +static void *PyDateToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) +{ + PyObject *obj = (PyObject *) _obj; + int y, m, d, days; + + y = PyDateTime_GET_YEAR(obj); + m = PyDateTime_GET_MONTH(obj); + d = PyDateTime_GET_DAY(obj); + + days = PyInt_AS_LONG(PyObject_CallMethod(PyDate_FromDate(y, m, 1), "toordinal", NULL)) - EPOCH_ORD + d - 1; + *( (JSINT64 *) outValue) = ((JSINT64) days * 86400); + + return NULL; +} + +//============================================================================= +// Numpy array iteration functions +//============================================================================= +int NpyArr_iterNextNone(JSOBJ _obj, JSONTypeContext *tc) +{ + return 0; +} + +void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) +{ + PyArrayObject *obj; + + if (GET_TC(tc)->newObj) + { + obj = (PyArrayObject *) GET_TC(tc)->newObj; + } + else + { + obj = (PyArrayObject *) _obj; + } + + if (PyArray_SIZE(obj) > 0) + { + PRINTMARK(); + NpyArrContext *npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + GET_TC(tc)->npyarr = npyarr; + + if (!npyarr) + { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + npyarr->array = (PyObject*) obj; + npyarr->getitem = (PyArray_GetItemFunc*) PyArray_DESCR(obj)->f->getitem; + npyarr->dataptr = PyArray_DATA(obj); + npyarr->ndim = PyArray_NDIM(obj) - 1; + npyarr->curdim = 0; + + if (GET_TC(tc)->transpose) + { + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; + } + else + { + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; + } + + npyarr->columnLabels = GET_TC(tc)->columnLabels; + npyarr->rowLabels = GET_TC(tc)->rowLabels; + } + else + { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + } + PRINTMARK(); +} + +void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->npyarr) + { + PyObject_Free(GET_TC(tc)->npyarr); + } + PRINTMARK(); +} + +void NpyArrPassThru_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + PRINTMARK(); +} + +void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + PRINTMARK(); + // finished this dimension, reset the data pointer + NpyArrContext* npyarr = GET_TC(tc)->npyarr; + npyarr->curdim--; + npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; + npyarr->stridedim -= npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->dataptr += npyarr->stride; +} + +int NpyArr_iterNextItem(JSOBJ _obj, JSONTypeContext *tc) +{ + PRINTMARK(); + NpyArrContext* npyarr = GET_TC(tc)->npyarr; + + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) + { + return 0; + } + + GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + + npyarr->dataptr += npyarr->stride; + npyarr->index[npyarr->stridedim]++; + return 1; +} + +int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) +{ + PRINTMARK(); + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + + if (npyarr->curdim >= npyarr->ndim || npyarr->index[npyarr->stridedim] >= npyarr->dim) + { + // innermost dimension, start retrieving item values + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + return NpyArr_iterNextItem(_obj, tc); + } + + // dig a dimension deeper + npyarr->index[npyarr->stridedim]++; + + npyarr->curdim++; + npyarr->stridedim += npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->index[npyarr->stridedim] = 0; + + ((PyObjectEncoder*) tc->encoder)->npyCtxtPassthru = npyarr; + GET_TC(tc)->itemValue = npyarr->array; + return 1; +} + +JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + PRINTMARK(); + return GET_TC(tc)->itemValue; +} + +char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + PRINTMARK(); + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + npy_intp idx; + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) + { + idx = npyarr->index[npyarr->stridedim] - 1; + *outLen = strlen(npyarr->columnLabels[idx]); + return npyarr->columnLabels[idx]; + } + else + { + idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; + *outLen = strlen(npyarr->rowLabels[idx]); + return npyarr->rowLabels[idx]; + } +} + +//============================================================================= +// Tuple iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyTuple_GET_SIZE( (PyObject *) obj); + GET_TC(tc)->itemValue = NULL; +} + +int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + PyObject *item; + + if (GET_TC(tc)->index >= GET_TC(tc)->size) + { + return 0; + } + + item = PyTuple_GET_ITEM (obj, GET_TC(tc)->index); + + GET_TC(tc)->itemValue = item; + GET_TC(tc)->index ++; + return 1; +} + +void Tuple_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ +} + +JSOBJ Tuple_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *Tuple_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + return NULL; +} + +//============================================================================= +// Dir iteration functions +// itemName ref is borrowed from PyObject_Dir (attrList). No refcount +// itemValue ref is from PyObject_GetAttr. Ref counted +//============================================================================= +void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->attrList = PyObject_Dir(obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); + PRINTMARK(); +} + +void Dir_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->itemValue) + { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + Py_DECREF( (PyObject *) GET_TC(tc)->attrList); + PRINTMARK(); +} + +int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) +{ + PyObject *obj = (PyObject *) _obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = NULL; + + + if (itemValue) + { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = itemValue = NULL; + } + + for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index ++) + { + PyObject* attr = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + char* attrStr = PyString_AS_STRING(attr); + + if (attrStr[0] == '_') + { + PRINTMARK(); + continue; + } + + itemValue = PyObject_GetAttr(obj, attr); + if (itemValue == NULL) + { + PyErr_Clear(); + PRINTMARK(); + continue; + } + + if (PyCallable_Check(itemValue)) + { + Py_DECREF(itemValue); + PRINTMARK(); + continue; + } + + PRINTMARK(); + itemName = attr; + break; + } + + if (itemName == NULL) + { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index ++; + + PRINTMARK(); + return 1; +} + + + +JSOBJ Dir_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + PRINTMARK(); + return GET_TC(tc)->itemValue; +} + +char *Dir_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + PRINTMARK(); + *outLen = PyString_GET_SIZE(GET_TC(tc)->itemName); + return PyString_AS_STRING(GET_TC(tc)->itemName); +} + + + + +//============================================================================= +// List iteration functions +// itemValue is borrowed from object (which is list). No refcounting +//============================================================================= +void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE( (PyObject *) obj); +} + +int List_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->index >= GET_TC(tc)->size) + { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->itemValue = PyList_GET_ITEM (obj, GET_TC(tc)->index); + GET_TC(tc)->index ++; + return 1; +} + +void List_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ +} + +JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *List_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + return NULL; +} + +//============================================================================= +// pandas Index iteration functions +//============================================================================= +void Index_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->index = 0; + GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); + if (!GET_TC(tc)->citemName) + { + PyErr_NoMemory(); + } + PRINTMARK(); +} + +int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + if (!GET_TC(tc)->citemName) + { + return 0; + } + + Py_ssize_t index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) + { + memcpy(GET_TC(tc)->citemName, "name", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } + else + if (index == 1) + { + memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + } + else + { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; +} + +void Index_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->citemName) + { + PyObject_Free(GET_TC(tc)->citemName); + } + PRINTMARK(); +} + +JSOBJ Index_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *Index_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + *outLen = strlen(GET_TC(tc)->citemName); + return GET_TC(tc)->citemName; +} + +//============================================================================= +// pandas Series iteration functions +//============================================================================= +void Series_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series + if (!GET_TC(tc)->citemName) + { + PyErr_NoMemory(); + } + PRINTMARK(); +} + +int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + if (!GET_TC(tc)->citemName) + { + return 0; + } + + Py_ssize_t index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) + { + memcpy(GET_TC(tc)->citemName, "name", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } + else + if (index == 1) + { + memcpy(GET_TC(tc)->citemName, "index", sizeof(char)*6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } + else + if (index == 2) + { + memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + } + else + { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; +} + +void Series_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + enc->outputFormat = enc->originalOutputFormat; + if (GET_TC(tc)->citemName) + { + PyObject_Free(GET_TC(tc)->citemName); + } + PRINTMARK(); +} + +JSOBJ Series_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *Series_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + *outLen = strlen(GET_TC(tc)->citemName); + return GET_TC(tc)->citemName; +} + +//============================================================================= +// pandas DataFrame iteration functions +//============================================================================= +void DataFrame_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->citemName = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series & index + if (!GET_TC(tc)->citemName) + { + PyErr_NoMemory(); + } + PRINTMARK(); +} + +int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + if (!GET_TC(tc)->citemName) + { + return 0; + } + + Py_ssize_t index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) + { + memcpy(GET_TC(tc)->citemName, "columns", sizeof(char)*8); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); + } + else + if (index == 1) + { + memcpy(GET_TC(tc)->citemName, "index", sizeof(char)*6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } + else + if (index == 2) + { + memcpy(GET_TC(tc)->citemName, "data", sizeof(char)*5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "values"); + } + else + { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; +} + +void DataFrame_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + enc->outputFormat = enc->originalOutputFormat; + if (GET_TC(tc)->citemName) + { + PyObject_Free(GET_TC(tc)->citemName); + } + PRINTMARK(); +} + +JSOBJ DataFrame_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *DataFrame_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + *outLen = strlen(GET_TC(tc)->citemName); + return GET_TC(tc)->citemName; +} + +//============================================================================= +// Dict iteration functions +// itemName might converted to string (Python_Str). Do refCounting +// itemValue is borrowed from object (which is dict). No refCounting +//============================================================================= +void Dict_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->index = 0; + PRINTMARK(); +} + +int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->itemName) + { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + + if (!PyDict_Next ( (PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) + { + PRINTMARK(); + return 0; + } + + if (PyUnicode_Check(GET_TC(tc)->itemName)) + { + GET_TC(tc)->itemName = PyUnicode_EncodeUTF8 ( + PyUnicode_AS_UNICODE(GET_TC(tc)->itemName), + PyUnicode_GET_SIZE(GET_TC(tc)->itemName), + NULL + ); + } + else + if (!PyString_Check(GET_TC(tc)->itemName)) + { + GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); + } + else + { + Py_INCREF(GET_TC(tc)->itemName); + } + PRINTMARK(); + return 1; +} + +void Dict_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + if (GET_TC(tc)->itemName) + { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + Py_DECREF(GET_TC(tc)->dictObj); + PRINTMARK(); +} + +JSOBJ Dict_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->itemValue; +} + +char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + *outLen = PyString_GET_SIZE(GET_TC(tc)->itemName); + return PyString_AS_STRING(GET_TC(tc)->itemName); +} + +void NpyArr_freeLabels(char** labels, npy_intp len) +{ + npy_intp i; + + if (labels) + { + for (i = 0; i < len; i++) + { + PyObject_Free(labels[i]); + } + PyObject_Free(labels); + } +} + +char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_intp num) +{ + PRINTMARK(); + npy_intp i, stride, len; + npy_intp bufsize = 32768; + char** ret; + char *dataptr, *cLabel, *origend, *origst, *origoffset; + char labelBuffer[bufsize]; + PyArray_GetItemFunc* getitem; + + if (PyArray_SIZE(labels) < num) + { + PyErr_SetString(PyExc_ValueError, "Label array sizes do not match corresponding data shape"); + return 0; + } + + ret = PyObject_Malloc(sizeof(char*)*num); + if (!ret) + { + PyErr_NoMemory(); + return 0; + } + + for (i = 0; i < num; i++) + { + ret[i] = NULL; + } + + origst = enc->start; + origend = enc->end; + origoffset = enc->offset; + + stride = PyArray_STRIDE(labels, 0); + dataptr = PyArray_DATA(labels); + getitem = PyArray_DESCR(labels)->f->getitem; + + for (i = 0; i < num; i++) + { + cLabel = JSON_EncodeObject(getitem(dataptr, labels), enc, labelBuffer, bufsize); + + if (PyErr_Occurred() || enc->errorMsg) + { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + // trim off any quotes surrounding the result + if (*cLabel == '\"') + { + cLabel++; + enc->offset -= 2; + *(enc->offset) = '\0'; + } + + len = enc->offset - cLabel + 1; + ret[i] = PyObject_Malloc(sizeof(char)*len); + + if (!ret[i]) + { + PyErr_NoMemory(); + ret = 0; + break; + } + + memcpy(ret[i], cLabel, sizeof(char)*len); + dataptr += stride; + } + + enc->start = origst; + enc->end = origend; + enc->offset = origoffset; + + return ret; +} + +void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc) +{ + PRINTMARK(); + if (!_obj) { + tc->type = JT_INVALID; + return; + } + + PyObject* obj = (PyObject*) _obj; + TypeContext *pc = (TypeContext *) tc->prv; + PyObjectEncoder* enc = (PyObjectEncoder*) tc->encoder; + PyObject *toDictFunc; + + int i; + for (i = 0; i < 32; i++) + { + tc->prv[i] = 0; + } + + if (PyIter_Check(obj) || PyArray_Check(obj)) + { + goto ISITERABLE; + } + + if (PyBool_Check(obj)) + { + PRINTMARK(); + tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; + return; + } + else + if (PyInt_Check(obj)) + { + PRINTMARK(); +#ifdef _LP64 + pc->PyTypeToJSON = PyIntToINT64; tc->type = JT_LONG; +#else + pc->PyTypeToJSON = PyIntToINT32; tc->type = JT_INT; +#endif + return; + } + else + if (PyLong_Check(obj)) + { + PyObject *exc; + + PRINTMARK(); + pc->PyTypeToJSON = PyLongToINT64; + tc->type = JT_LONG; + GET_TC(tc)->longValue = PyLong_AsLongLong(obj); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) + { + PRINTMARK(); + tc->type = JT_INVALID; + return; + } + + return; + } + else + if (PyArray_IsScalar(obj, Integer)) + { + PyObject *exc; + + PRINTMARK(); + pc->PyTypeToJSON = PyLongToINT64; + tc->type = JT_LONG; + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), PyArray_DescrFromType(NPY_LONG)); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) + { + PRINTMARK(); + tc->type = JT_INVALID; + return; + } + + return; + } + else + if (PyString_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8; + return; + } + else + if (PyUnicode_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyUnicodeToUTF8; tc->type = JT_UTF8; + return; + } + else + if (PyFloat_Check(obj)) + { + PRINTMARK(); + double val = PyFloat_AS_DOUBLE (obj); + if (npy_isnan(val) || npy_isinf(val)) + { + tc->type = JT_NULL; + } + else + { + pc->PyTypeToJSON = PyFloatToDOUBLE; tc->type = JT_DOUBLE; + } + return; + } + else + if (PyArray_IsScalar(obj, Float)) + { + PRINTMARK(); + pc->PyTypeToJSON = NpyFloatToDOUBLE; tc->type = JT_DOUBLE; + return; + } + else + if (PyArray_IsScalar(obj, Half)) + { + PRINTMARK(); + pc->PyTypeToJSON = NpyHalfToDOUBLE; tc->type = JT_DOUBLE; + return; + } + else + if (PyArray_IsScalar(obj, Datetime)) + { + PRINTMARK(); + pc->PyTypeToJSON = NpyDateTimeToINT64; tc->type = JT_LONG; + return; + } + else + if (PyDateTime_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyDateTimeToINT64; tc->type = JT_LONG; + return; + } + else + if (PyDate_Check(obj)) + { + PRINTMARK(); + pc->PyTypeToJSON = PyDateToINT64; tc->type = JT_LONG; + return; + } + else + if (obj == Py_None) + { + PRINTMARK(); + tc->type = JT_NULL; + return; + } + + +ISITERABLE: + + if (PyDict_Check(obj)) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = obj; + Py_INCREF(obj); + + return; + } + else + if (PyList_Check(obj)) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = List_iterBegin; + pc->iterEnd = List_iterEnd; + pc->iterNext = List_iterNext; + pc->iterGetValue = List_iterGetValue; + pc->iterGetName = List_iterGetName; + return; + } + else + if (PyTuple_Check(obj)) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = Tuple_iterBegin; + pc->iterEnd = Tuple_iterEnd; + pc->iterNext = Tuple_iterNext; + pc->iterGetValue = Tuple_iterGetValue; + pc->iterGetName = Tuple_iterGetName; + return; + } + else + if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_index)) + { + if (enc->outputFormat == SPLIT) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Index_iterBegin; + pc->iterEnd = Index_iterEnd; + pc->iterNext = Index_iterNext; + pc->iterGetValue = Index_iterGetValue; + pc->iterGetName = Index_iterGetName; + return; + } + + PRINTMARK(); + tc->type = JT_ARRAY; + pc->newObj = PyObject_GetAttrString(obj, "values"); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } + else + if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_series)) + { + if (enc->outputFormat == SPLIT) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Series_iterBegin; + pc->iterEnd = Series_iterEnd; + pc->iterNext = Series_iterNext; + pc->iterGetValue = Series_iterGetValue; + pc->iterGetName = Series_iterGetName; + return; + } + + if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->columnLabelsLen = PyArray_SIZE(obj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + tc->type = JT_INVALID; + return; + } + } + else + { + PRINTMARK(); + tc->type = JT_ARRAY; + } + pc->newObj = PyObject_GetAttrString(obj, "values"); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } + else + if (PyArray_Check(obj)) + { + if (enc->npyCtxtPassthru) + { + PRINTMARK(); + pc->npyarr = enc->npyCtxtPassthru; + tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); + pc->iterBegin = NpyArrPassThru_iterBegin; + pc->iterEnd = NpyArrPassThru_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + enc->npyCtxtPassthru = NULL; + return; + } + + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } + else + if (PyObject_TypeCheck(obj, (PyTypeObject*) cls_dataframe)) + { + if (enc->outputFormat == SPLIT) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = DataFrame_iterBegin; + pc->iterEnd = DataFrame_iterEnd; + pc->iterNext = DataFrame_iterNext; + pc->iterGetValue = DataFrame_iterGetValue; + pc->iterGetName = DataFrame_iterGetName; + return; + } + + PRINTMARK(); + pc->newObj = PyObject_GetAttrString(obj, "values"); + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + if (enc->outputFormat == VALUES) + { + PRINTMARK(); + tc->type = JT_ARRAY; + } + else + if (enc->outputFormat == RECORDS) + { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + tc->type = JT_INVALID; + return; + } + } + else + if (enc->outputFormat == INDEX) + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->rowLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->rowLabelsLen); + if (!pc->rowLabels) + { + tc->type = JT_INVALID; + return; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 1); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + tc->type = JT_INVALID; + return; + } + } + else + { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->rowLabelsLen = PyArray_DIM(pc->newObj, 1); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "columns"), (JSONObjectEncoder*) enc, pc->rowLabelsLen); + if (!pc->rowLabels) + { + tc->type = JT_INVALID; + return; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject*) PyObject_GetAttrString(obj, "index"), (JSONObjectEncoder*) enc, pc->columnLabelsLen); + if (!pc->columnLabels) + { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + tc->type = JT_INVALID; + return; + } + pc->transpose = 1; + } + return; + } + + + toDictFunc = PyObject_GetAttrString(obj, "toDict"); + + if (toDictFunc) + { + PyObject* tuple = PyTuple_New(0); + PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL); + Py_DECREF(tuple); + Py_DECREF(toDictFunc); + + if (toDictResult == NULL) + { + PyErr_Clear(); + tc->type = JT_NULL; + return; + } + + if (!PyDict_Check(toDictResult)) + { + Py_DECREF(toDictResult); + tc->type = JT_NULL; + return; + } + + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = toDictResult; + return; + } + + PyErr_Clear(); + + tc->type = JT_OBJECT; + pc->iterBegin = Dir_iterBegin; + pc->iterEnd = Dir_iterEnd; + pc->iterNext = Dir_iterNext; + pc->iterGetValue = Dir_iterGetValue; + pc->iterGetName = Dir_iterGetName; + + return; +} + + +void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) +{ + Py_XDECREF(GET_TC(tc)->newObj); + NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen); + NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen); +} + +const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen) +{ + return GET_TC(tc)->PyTypeToJSON (obj, tc, NULL, _outLen); +} + +JSINT64 Object_getLongValue(JSOBJ obj, JSONTypeContext *tc) +{ + JSINT64 ret; + GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); + + return ret; +} + +JSINT32 Object_getIntValue(JSOBJ obj, JSONTypeContext *tc) +{ + JSINT32 ret; + GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); + return ret; +} + + +double Object_getDoubleValue(JSOBJ obj, JSONTypeContext *tc) +{ + double ret; + GET_TC(tc)->PyTypeToJSON (obj, tc, &ret, NULL); + return ret; +} + +static void Object_releaseObject(JSOBJ _obj) +{ + Py_DECREF( (PyObject *) _obj); +} + + + +void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->iterBegin(obj, tc); +} + +int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->iterNext(obj, tc); +} + +void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) +{ + GET_TC(tc)->iterEnd(obj, tc); +} + +JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) +{ + return GET_TC(tc)->iterGetValue(obj, tc); +} + +char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) +{ + return GET_TC(tc)->iterGetName(obj, tc, outLen); +} + + +PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "obj", "ensure_ascii", "double_precision", "orient", NULL}; + + char buffer[65536]; + char *ret; + PyObject *newobj; + PyObject *oinput = NULL; + PyObject *oensureAscii = NULL; + char *sOrient = NULL; + int idoublePrecision = 5; // default double precision setting + + PyObjectEncoder pyEncoder = + { + { + Object_beginTypeContext, //void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); + Object_endTypeContext, //void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); + Object_getStringValue, //const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, size_t *_outLen); + Object_getLongValue, //JSLONG (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + Object_getIntValue, //JSLONG (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + Object_getDoubleValue, //double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + Object_iterBegin, //JSPFN_ITERBEGIN iterBegin; + Object_iterNext, //JSPFN_ITERNEXT iterNext; + Object_iterEnd, //JSPFN_ITEREND iterEnd; + Object_iterGetValue, //JSPFN_ITERGETVALUE iterGetValue; + Object_iterGetName, //JSPFN_ITERGETNAME iterGetName; + Object_releaseObject, //void (*releaseValue)(JSONTypeContext *ti); + PyObject_Malloc, //JSPFN_MALLOC malloc; + PyObject_Realloc, //JSPFN_REALLOC realloc; + PyObject_Free, //JSPFN_FREE free; + -1, //recursionMax + idoublePrecision, + 1, //forceAscii + } + }; + JSONObjectEncoder* encoder = (JSONObjectEncoder*) &pyEncoder; + + pyEncoder.npyCtxtPassthru = NULL; + pyEncoder.outputFormat = COLUMNS; + + PRINTMARK(); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Ois", kwlist, &oinput, &oensureAscii, &idoublePrecision, &sOrient)) + { + return NULL; + } + + if (sOrient != NULL) + { + if (strcmp(sOrient, "records") == 0) + { + pyEncoder.outputFormat = RECORDS; + } + else + if (strcmp(sOrient, "index") == 0) + { + pyEncoder.outputFormat = INDEX; + } + else + if (strcmp(sOrient, "split") == 0) + { + pyEncoder.outputFormat = SPLIT; + } + else + if (strcmp(sOrient, "values") == 0) + { + pyEncoder.outputFormat = VALUES; + } + else + if (strcmp(sOrient, "columns") != 0) + { + PyErr_Format (PyExc_ValueError, "Invalid value '%s' for option 'orient'", sOrient); + return NULL; + } + } + + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; + + if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) + { + encoder->forceASCII = 0; + } + + encoder->doublePrecision = idoublePrecision; + + PRINTMARK(); + ret = JSON_EncodeObject (oinput, encoder, buffer, sizeof (buffer)); + PRINTMARK(); + + if (PyErr_Occurred()) + { + return NULL; + } + + if (encoder->errorMsg) + { + if (ret != buffer) + { + encoder->free (ret); + } + + PyErr_Format (PyExc_OverflowError, "%s", encoder->errorMsg); + return NULL; + } + + newobj = PyString_FromString (ret); + + if (ret != buffer) + { + encoder->free (ret); + } + + PRINTMARK(); + + return newobj; +} + +PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs) +{ + PyObject *data; + PyObject *file; + PyObject *string; + PyObject *write; + PyObject *argtuple; + + PRINTMARK(); + + if (!PyArg_ParseTuple (args, "OO", &data, &file)) { + return NULL; + } + + if (!PyObject_HasAttrString (file, "write")) + { + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } + + write = PyObject_GetAttrString (file, "write"); + + if (!PyCallable_Check (write)) { + Py_XDECREF(write); + PyErr_Format (PyExc_TypeError, "expected file"); + return NULL; + } + + argtuple = PyTuple_Pack(1, data); + + string = objToJSON (self, argtuple, kwargs); + + if (string == NULL) + { + Py_XDECREF(write); + Py_XDECREF(argtuple); + return NULL; + } + + Py_XDECREF(argtuple); + + argtuple = PyTuple_Pack (1, string); + if (argtuple == NULL) + { + Py_XDECREF(write); + return NULL; + } + if (PyObject_CallObject (write, argtuple) == NULL) + { + Py_XDECREF(write); + Py_XDECREF(argtuple); + return NULL; + } + + Py_XDECREF(write); + Py_DECREF(argtuple); + Py_XDECREF(string); + + PRINTMARK(); + + Py_RETURN_NONE; + + +} + diff --git a/pandas/src/ujson/python/ujson.c b/pandas/src/ujson/python/ujson.c new file mode 100644 index 0000000000000..21f7ba8b106cf --- /dev/null +++ b/pandas/src/ujson/python/ujson.c @@ -0,0 +1,41 @@ +#include +#include "version.h" + +/* objToJSON */ +PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs); +void initObjToJSON(void); + +/* JSONToObj */ +PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs); + +/* objToJSONFile */ +PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs); + +/* JSONFileToObj */ +PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs); + + +static PyMethodDef ujsonMethods[] = { + {"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8. Pass in double_precision to alter the maximum digit precision with doubles"}, + {"decode", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure"}, + {"dumps", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"}, + {"loads", (PyCFunction) JSONToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as string to dict object structure"}, + {"dump", (PyCFunction) objToJSONFile, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON file. Use ensure_ascii=false to output UTF-8"}, + {"load", (PyCFunction) JSONFileToObj, METH_VARARGS | METH_KEYWORDS, "Converts JSON as file to dict object structure"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + + +PyMODINIT_FUNC +init_ujson(void) +{ + PyObject *module; + PyObject *version_string; + + initObjToJSON(); + module = Py_InitModule("_ujson", ujsonMethods); + + version_string = PyString_FromString (UJSON_VERSION); + PyModule_AddObject (module, "__version__", version_string); +} diff --git a/pandas/src/ujson/python/version.h b/pandas/src/ujson/python/version.h new file mode 100644 index 0000000000000..9449441411192 --- /dev/null +++ b/pandas/src/ujson/python/version.h @@ -0,0 +1 @@ +#define UJSON_VERSION "1.18" diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2551eda5350c1..36928754371e8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1997,6 +1997,143 @@ def test_to_dict(self): for k2, v2 in v.iteritems(): self.assertEqual(v2, recons_data[k][k2]) + def test_from_json_to_json(self): + + def _check_orient(df, orient, dtype=None, numpy=True): + df = df.sort() + dfjson = df.to_json(orient=orient) + unser = DataFrame.from_json(dfjson, orient=orient, dtype=dtype, + numpy=numpy) + unser = unser.sort() + mktimestamp = datetime.fromtimestamp + if df.index.dtype.type == np.datetime64: + unser.index = [mktimestamp(float(d)) for d in unser.index] + if orient == "records": + # index is not captured in this orientation + assert_almost_equal(df.values, unser.values) + self.assert_(df.columns.equals(unser.columns)) + elif orient == "values": + # index and cols are not captured in this orientation + assert_almost_equal(df.values, unser.values) + elif orient == "split": + # index and col labels might not be strings + unser.index = [str(i) for i in unser.index] + unser.columns = [str(i) for i in unser.columns] + unser = unser.sort() + assert_almost_equal(df.values, unser.values) + else: + assert_frame_equal(df, unser) + + def _check_all_orients(df, dtype=None): + _check_orient(df, "columns", dtype=dtype) + _check_orient(df, "records", dtype=dtype) + _check_orient(df, "split", dtype=dtype) + _check_orient(df, "index", dtype=dtype) + _check_orient(df, "values", dtype=dtype) + + _check_orient(df, "columns", dtype=dtype, numpy=False) + _check_orient(df, "records", dtype=dtype, numpy=False) + _check_orient(df, "split", dtype=dtype, numpy=False) + _check_orient(df, "index", dtype=dtype, numpy=False) + _check_orient(df, "values", dtype=dtype, numpy=False) + + # basic + _check_all_orients(self.frame) + self.assertEqual(self.frame.to_json(), + self.frame.to_json(orient="columns")) + + _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) + + # big one + # index and columns are strings as all unserialised JSON object keys + # are assumed to be strings + biggie = DataFrame(np.zeros((200, 4)), + columns=[str(i) for i in range(4)], + index=[str(i) for i in range(200)]) + _check_all_orients(biggie) + + # dtypes + _check_all_orients(DataFrame(biggie, dtype=np.float64), + dtype=np.float64) + _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int) + _check_all_orients(DataFrame(biggie, dtype='