diff options
Diffstat (limited to 'pyload/lib/simplejson/_speedups.c')
-rw-r--r-- | pyload/lib/simplejson/_speedups.c | 3339 |
1 files changed, 3339 insertions, 0 deletions
diff --git a/pyload/lib/simplejson/_speedups.c b/pyload/lib/simplejson/_speedups.c new file mode 100644 index 000000000..01614c49a --- /dev/null +++ b/pyload/lib/simplejson/_speedups.c @@ -0,0 +1,3339 @@ +/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */ +#include "Python.h" +#include "structmember.h" + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyString_Check PyBytes_Check +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyInt_Check(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE +#define JSON_ASCII_Check PyUnicode_Check +#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 +#define PyInt_Type PyLong_Type +#define PyInt_FromString PyLong_FromString +#define PY2_UNUSED +#define PY3_UNUSED UNUSED +#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) +#else /* PY_MAJOR_VERSION >= 3 */ +#define PY2_UNUSED UNUSED +#define PY3_UNUSED +#define PyUnicode_READY(obj) 0 +#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) +#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) +#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) +#define PyUnicode_GetLength PyUnicode_GET_SIZE +#define JSON_UNICHR Py_UNICODE +#define JSON_ASCII_Check PyString_Check +#define JSON_ASCII_AS_STRING PyString_AS_STRING +#define JSON_InternFromString PyString_InternFromString +#define JSON_Intern_GET_SIZE PyString_GET_SIZE +#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) +#define PyOS_string_to_double json_PyOS_string_to_double +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) +{ + double x; + assert(endptr == NULL); + assert(overflow_exception == NULL); + PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) + x = PyOS_ascii_atof(s); + PyFPE_END_PROTECT(x) + return x; +} +#endif +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif +#if !defined(Py_SIZE) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) +#endif +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#if PY_VERSION_HEX < 0x02050000 +#if !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#define PyInt_FromSsize_t PyInt_FromLong +#define PyInt_AsSsize_t PyInt_AsLong +#endif +#if !defined(Py_IS_FINITE) +#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) +#endif +#endif /* PY_VERSION_HEX < 0x02050000 */ + +#ifdef __GNUC__ +#define UNUSED __attribute__((__unused__)) +#else +#define UNUSED +#endif + +#define DEFAULT_ENCODING "utf-8" + +#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) +#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) +#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) +#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) + +#define JSON_ALLOW_NAN 1 +#define JSON_IGNORE_NAN 2 + +static PyTypeObject PyScannerType; +static PyTypeObject PyEncoderType; + +typedef struct { + PyObject *large_strings; /* A list of previously accumulated large strings */ + PyObject *small_strings; /* Pending small strings */ +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + +#define ERR_EXPECTING_VALUE "Expecting value" +#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'" +#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'" +#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'" +#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes" +#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'" +#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter" +#define ERR_STRING_UNTERMINATED "Unterminated string starting at" +#define ERR_STRING_CONTROL "Invalid control character %r at" +#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r" +#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence" + +typedef struct _PyScannerObject { + PyObject_HEAD + PyObject *encoding; + PyObject *strict; + PyObject *object_hook; + PyObject *pairs_hook; + PyObject *parse_float; + PyObject *parse_int; + PyObject *parse_constant; + PyObject *memo; +} PyScannerObject; + +static PyMemberDef scanner_members[] = { + {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, + {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, + {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, + {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, + {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, + {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, + {NULL} +}; + +typedef struct _PyEncoderObject { + PyObject_HEAD + PyObject *markers; + PyObject *defaultfn; + PyObject *encoder; + PyObject *indent; + PyObject *key_separator; + PyObject *item_separator; + PyObject *sort_keys; + PyObject *key_memo; + PyObject *encoding; + PyObject *Decimal; + PyObject *skipkeys_bool; + int skipkeys; + int fast_encode; + /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */ + int allow_or_ignore_nan; + int use_decimal; + int namedtuple_as_object; + int tuple_as_array; + PyObject *max_long_size; + PyObject *min_long_size; + PyObject *item_sort_key; + PyObject *item_sort_kw; + int for_json; +} PyEncoderObject; + +static PyMemberDef encoder_members[] = { + {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, + {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, + {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, + {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, + {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, + {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, + {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, + /* Python 2.5 does not support T_BOOl */ + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"}, + {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, + {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, + {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"}, + {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"}, + {NULL} +}; + +static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * +JSON_ParseEncoding(PyObject *encoding); +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c); +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj); +static Py_ssize_t +ascii_char_size(JSON_UNICHR c); +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); +#if PY_MAJOR_VERSION < 3 +static PyObject * +join_list_string(PyObject *lst); +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +scanner_dealloc(PyObject *self); +static int +scanner_clear(PyObject *self); +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds); +static void +encoder_dealloc(PyObject *self); +static int +encoder_clear(PyObject *self); +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key); +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *obj); +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj); +static int +_is_namedtuple(PyObject *obj); +static int +_has_for_json_hook(PyObject *obj); +static PyObject * +moduleinit(void); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') +#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) + +#define MIN_EXPANSION 6 + +static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large_strings = NULL; + acc->small_strings = PyList_New(0); + if (acc->small_strings == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large_strings == NULL) { + acc->large_strings = PyList_New(0); + if (acc->large_strings == NULL) + return -1; + } +#if PY_MAJOR_VERSION >= 3 + joined = join_list_unicode(acc->small_strings); +#else /* PY_MAJOR_VERSION >= 3 */ + joined = join_list_string(acc->small_strings); +#endif /* PY_MAJOR_VERSION < 3 */ + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large_strings, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small_strings, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small_strings); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small_strings); + if (ret) { + Py_CLEAR(acc->large_strings); + return NULL; + } + res = acc->large_strings; + acc->large_strings = NULL; + if (res == NULL) + return PyList_New(0); + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small_strings); + Py_CLEAR(acc->large_strings); +} + +static int +IS_DIGIT(JSON_UNICHR c) +{ + return c >= '0' && c <= '9'; +} + +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *rval = PyUnicode_New(1, c); + if (rval) + PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); + return rval; +#else /* PY_MAJOR_VERSION >= 3 */ + return PyUnicode_FromUnicode(&c, 1); +#endif /* PY_MAJOR_VERSION < 3 */ +} + +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj) +{ + if (s->max_long_size != Py_None && s->min_long_size != Py_None) { + if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) || + PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); +#else + PyObject* quoted = PyString_FromFormat("\"%s\"", + PyString_AsString(encoded)); +#endif + Py_DECREF(encoded); + encoded = quoted; + } + } + + return encoded; +} + +static int +_is_namedtuple(PyObject *obj) +{ + int rval = 0; + PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict"); + if (_asdict == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(_asdict); + Py_DECREF(_asdict); + return rval; +} + +static int +_has_for_json_hook(PyObject *obj) +{ + int rval = 0; + PyObject *for_json = PyObject_GetAttrString(obj, "for_json"); + if (for_json == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(for_json); + Py_DECREF(for_json); + return rval; +} + +static int +_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) +{ + /* PyObject to Py_ssize_t converter */ + *size_ptr = PyInt_AsSsize_t(o); + if (*size_ptr == -1 && PyErr_Occurred()) + return 0; + return 1; +} + +static PyObject * +_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) +{ + /* Py_ssize_t to PyObject converter */ + return PyInt_FromSsize_t(*size_ptr); +} + +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) +{ + /* Escape unicode code point c to ASCII escape sequences + in char *output. output must have at least 12 bytes unused to + accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } + } + return chars; +} + +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + else if (c >= 0x10000U) { + return 2 * MIN_EXPANSION; + } +#endif + else { + return MIN_EXPANSION; + } +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PY2_UNUSED int kind; + void *data; + PyObject *rval; + char *output; + + if (PyUnicode_READY(pystr)) + return NULL; + + kind = PyUnicode_KIND(pystr); + data = PyUnicode_DATA(pystr); + input_chars = PyUnicode_GetLength(pystr); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); + output = (char *)PyUnicode_DATA(rval); +#else + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); +#endif + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} + +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + if (input == NULL) + return NULL; + rval = ascii_escape_unicode(input); + Py_DECREF(input); + return rval; +} + +#else /* PY_MAJOR_VERSION >= 3 */ + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + /* Take a PyString pystr and return a new ASCII-only escaped PyString */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + output_size = 2; + + /* Fast path for a string that's already ASCII */ + for (i = 0; i < input_chars; i++) { + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } + output_size += ascii_char_size(c); + } + + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + chars = 0; + output = PyString_AS_STRING(rval); + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); + } + output[chars++] = '"'; + assert(chars == output_size); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key) +{ + if (PyUnicode_Check(key)) { + Py_INCREF(key); + return key; + } + else if (PyString_Check(key)) { +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_Decode( + PyString_AS_STRING(key), + PyString_GET_SIZE(key), + JSON_ASCII_AS_STRING(s->encoding), + NULL); +#else /* PY_MAJOR_VERSION >= 3 */ + Py_INCREF(key); + return key; +#endif /* PY_MAJOR_VERSION < 3 */ + } + else if (PyFloat_Check(key)) { + return encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + return _encoded_const(key); + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + return PyObject_Str(key); + } + else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { + return PyObject_Str(key); + } + else if (s->skipkeys) { + Py_INCREF(Py_None); + return Py_None; + } + PyErr_SetString(PyExc_TypeError, "keys must be a string"); + return NULL; +} + +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + static PyObject *sortfun = NULL; + static PyObject *sortargs = NULL; + + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } + + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + return NULL; + if (s->item_sort_kw == Py_None) + return iter; + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(key)) { + /* item can be added as-is */ + } +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyUnicode_Check(key)) { + /* item can be added as-is */ + } + else { + PyObject *tpl; + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(kstr); + continue; + } + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + tpl = PyTuple_Pack(2, kstr, value); + if (tpl == NULL) + goto bail; + Py_CLEAR(kstr); + Py_DECREF(item); + item = tpl; + } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) + goto bail; + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; +} + +static void +raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) +{ + /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ + static PyObject *JSONDecodeError = NULL; + PyObject *exc; + if (JSONDecodeError == NULL) { + PyObject *scanner = PyImport_ImportModule("simplejson.scanner"); + if (scanner == NULL) + return; + JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError"); + Py_DECREF(scanner); + if (JSONDecodeError == NULL) + return; + } + exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (exc) { + PyErr_SetObject(JSONDecodeError, exc); + Py_DECREF(exc); + } +} + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return u''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = JSON_NewEmptyUnicode(); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} + +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ +static PyObject * +join_list_string(PyObject *lst) +{ + /* return ''.join(lst) */ + static PyObject *joinfn = NULL; + if (joinfn == NULL) { + PyObject *ustr = PyString_FromStringAndSize(NULL, 0); + if (ustr == NULL) + return NULL; + + joinfn = PyObject_GetAttrString(ustr, "join"); + Py_DECREF(ustr); + if (joinfn == NULL) + return NULL; + } + return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) +{ + /* return (rval, idx) tuple, stealing reference to rval */ + PyObject *tpl; + PyObject *pyidx; + /* + steal a reference to rval, returns (rval, idx) + */ + if (rval == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + pyidx = PyInt_FromSsize_t(idx); + if (pyidx == NULL) { + Py_DECREF(rval); + return NULL; + } + tpl = PyTuple_New(2); + if (tpl == NULL) { + Py_DECREF(pyidx); + Py_DECREF(rval); + return NULL; + } + PyTuple_SET_ITEM(tpl, 0, rval); + PyTuple_SET_ITEM(tpl, 1, pyidx); + return tpl; +} + +#define APPEND_OLD_CHUNK \ + if (chunk != NULL) { \ + if (chunks == NULL) { \ + chunks = PyList_New(0); \ + if (chunks == NULL) { \ + goto bail; \ + } \ + } \ + if (PyList_Append(chunks, chunk)) { \ + goto bail; \ + } \ + Py_CLEAR(chunk); \ + } + +#if PY_MAJOR_VERSION < 3 +static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyString pystr. + end is the index of the first character after the quote. + encoding is the encoding of pystr (must be an ASCII superset) + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyString (if ASCII-only) or PyUnicode + */ + PyObject *rval; + Py_ssize_t len = PyString_GET_SIZE(pystr); + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + int has_unicode = 0; + char *buf = PyString_AS_STRING(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + PyObject *strchunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + else if (end < 0 || len < end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + Py_UNICODE c = 0; + for (next = end; next < len; next++) { + c = (unsigned char)buf[next]; + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg(ERR_STRING_CONTROL, pystr, next); + goto bail; + } + else if (c > 0x7f) { + has_unicode = 1; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + if (!has_unicode) { + chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); + } + else { + chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); + } + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + strchunk = PyString_FromStringAndSize(&buf[end], next - end); + if (strchunk == NULL) { + goto bail; + } + if (has_unicode) { + chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); + Py_DECREF(strchunk); + if (chunk == NULL) { + goto bail; + } + } + else { + chunk = strchunk; + } +#endif /* PY_MAJOR_VERSION < 3 */ + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + c = buf[next]; + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } +#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { + JSON_UNICHR c2 = 0; + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + c2 <<= 4; + JSON_UNICHR digit = buf[next]; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + } + } +#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ + } + if (c > 0x7f) { + has_unicode = 1; + } + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + if (has_unicode) { + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } + } + else { + char c_char = Py_CHARMASK(c); + chunk = PyString_FromStringAndSize(&c_char, 1); + if (chunk == NULL) { + goto bail; + } + } +#endif + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = JSON_NewEmptyUnicode(); + } + else { + APPEND_OLD_CHUNK + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunk); + Py_XDECREF(chunks); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) +{ + /* Read the JSON string from PyUnicode pystr. + end is the index of the first character after the quote. + if strict is zero then literal control characters are allowed + *next_end_ptr is a return-by-reference index of the character + after the end quote + + Return value is a new PyUnicode + */ + PyObject *rval; + Py_ssize_t begin = end - 1; + Py_ssize_t next = begin; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + Py_ssize_t len = PyUnicode_GetLength(pystr); + void *buf = PyUnicode_DATA(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + else if (end < 0 || len < end) { + PyErr_SetString(PyExc_ValueError, "end is out of bounds"); + goto bail; + } + while (1) { + /* Find the end of the string or the next escape */ + JSON_UNICHR c = 0; + for (next = end; next < len; next++) { + c = PyUnicode_READ(kind, buf, next); + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg(ERR_STRING_CONTROL, pystr, next); + goto bail; + } + } + if (!(c == '"' || c == '\\')) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + /* Pick up this chunk if it's not zero length */ + if (next != end) { + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION < 3 + chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); +#else + chunk = PyUnicode_Substring(pystr, end, next); +#endif + if (chunk == NULL) { + goto bail; + } + } + next++; + if (c == '"') { + end = next; + break; + } + if (next == len) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); + goto bail; + } + c = PyUnicode_READ(kind, buf, next); + if (c != 'u') { + /* Non-unicode backslash escapes */ + end = next + 1; + switch (c) { + case '"': break; + case '\\': break; + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = 0; + } + if (c == 0) { + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); + goto bail; + } + } + else { + c = 0; + next++; + end = next + 4; + if (end >= len) { + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); + goto bail; + } + /* Decode 4 hex digits */ + for (; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) + /* Surrogate pair */ + if ((c & 0xfc00) == 0xd800) { + JSON_UNICHR c2 = 0; + if (end + 6 < len && + PyUnicode_READ(kind, buf, next) == '\\' && + PyUnicode_READ(kind, buf, next + 1) == 'u') { + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c2 <<= 4; + switch (digit) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + c2 |= (digit - '0'); break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + c2 |= (digit - 'a' + 10); break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + c2 |= (digit - 'A' + 10); break; + default: + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); + goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); + } + } + } +#endif + } + APPEND_OLD_CHUNK + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = JSON_NewEmptyUnicode(); + } + else { + APPEND_OLD_CHUNK + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + *next_end_ptr = end; + return rval; +bail: + *next_end_ptr = -1; + Py_XDECREF(chunk); + Py_XDECREF(chunks); + return NULL; +} + +PyDoc_STRVAR(pydoc_scanstring, + "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" + "\n" + "Scan the string s for a JSON string. End is the index of the\n" + "character in s after the quote that started the JSON string.\n" + "Unescapes all valid JSON string escape sequences and raises ValueError\n" + "on attempt to decode an invalid string. If strict is False then literal\n" + "control characters are allowed in the string.\n" + "\n" + "Returns a tuple of the decoded string and the index of the character in s\n" + "after the end quote." +); + +static PyObject * +py_scanstring(PyObject* self UNUSED, PyObject *args) +{ + PyObject *pystr; + PyObject *rval; + Py_ssize_t end; + Py_ssize_t next_end = -1; + char *encoding = NULL; + int strict = 1; + if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { + return NULL; + } + if (encoding == NULL) { + encoding = DEFAULT_ENCODING; + } + if (PyUnicode_Check(pystr)) { + rval = scanstring_unicode(pystr, end, strict, &next_end); + } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return _build_rval_index_tuple(rval, next_end); +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "Return an ASCII-only JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) +{ + /* Return an ASCII-only JSON representation of a Python string */ + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } + else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } +} + +static void +scanner_dealloc(PyObject *self) +{ + /* Deallocate scanner object */ + scanner_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +scanner_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_VISIT(s->encoding); + Py_VISIT(s->strict); + Py_VISIT(s->object_hook); + Py_VISIT(s->pairs_hook); + Py_VISIT(s->parse_float); + Py_VISIT(s->parse_int); + Py_VISIT(s->parse_constant); + Py_VISIT(s->memo); + return 0; +} + +static int +scanner_clear(PyObject *self) +{ + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + Py_CLEAR(s->memo); + return 0; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON object from PyString pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook or + object_pairs_hook can change that) + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; + PyObject *key = NULL; + PyObject *val = NULL; + char *encoding = JSON_ASCII_AS_STRING(s->encoding); + int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; + Py_ssize_t next_idx; + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && str[idx] != '}') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + + /* read key */ + if (str[idx] != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); + if (key == NULL) + goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip whitespace */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + if (idx > end_idx || str[idx] != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* read any JSON data type */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; + if (idx > end_idx) break; + if (str[idx] == '}') { + break; + } + else if (str[idx] != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + } + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || str[idx] != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } + goto bail; + } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(rval); + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(pairs); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON object from PyUnicode pystr. + idx is the index of the first character after the opening curly brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing curly brace. + + Returns a new PyObject (usually a dict, but object_hook can change that) + */ + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; + PyObject *key = NULL; + PyObject *val = NULL; + int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; + Py_ssize_t next_idx; + + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + + /* skip whitespace after { */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* only loop if the object is non-empty */ + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + + /* read key */ + if (PyUnicode_READ(kind, str, idx) != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); + if (key == NULL) + goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } + idx = next_idx; + + /* skip whitespace between key and : delimiter, read :, skip + whitespace */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) + goto bail; + + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } + idx = next_idx; + + /* skip whitespace before } or , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* bail if the object is closed or we didn't get the , + delimiter */ + did_parse = 1; + if (idx > end_idx) break; + if (PyUnicode_READ(kind, str, idx) == '}') { + break; + } + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , delimiter */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be '}' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } + goto bail; + } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + /* if object_hook is not None: rval = object_hook(rval) */ + if (s->object_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); + if (val == NULL) + goto bail; + Py_DECREF(rval); + rval = val; + val = NULL; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(rval); + Py_XDECREF(key); + Py_XDECREF(val); + Py_XDECREF(pairs); + return NULL; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && str[idx] != ']') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + trailing_delimiter = 0; + /* read any JSON term and de-tuplefy the (rval, idx) */ + val = scan_once_str(s, pystr, idx, &next_idx); + if (val == NULL) { + goto bail; + } + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (str[idx] == ']') { + break; + } + else if (str[idx] != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || str[idx] != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON array from PyString pystr. + idx is the index of the first character after the opening brace. + *next_idx_ptr is a return-by-reference index to the first character after + the closing brace. + + Returns a new PyList + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PyObject *val = NULL; + PyObject *rval = PyList_New(0); + Py_ssize_t next_idx; + if (rval == NULL) + return NULL; + + /* skip whitespace after [ */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* only loop if the array is non-empty */ + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { + int trailing_delimiter = 0; + while (idx <= end_idx) { + trailing_delimiter = 0; + /* read any JSON term */ + val = scan_once_unicode(s, pystr, idx, &next_idx); + if (val == NULL) { + goto bail; + } + + if (PyList_Append(rval, val) == -1) + goto bail; + + Py_CLEAR(val); + idx = next_idx; + + /* skip whitespace between term and , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + + /* bail if the array is closed or we didn't get the , delimiter */ + if (idx > end_idx) break; + if (PyUnicode_READ(kind, str, idx) == ']') { + break; + } + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + goto bail; + } + idx++; + + /* skip whitespace after , */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; + } + } + + /* verify that idx < end_idx, str[idx] should be ']' */ + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } + goto bail; + } + *next_idx_ptr = idx + 1; + return rval; +bail: + Py_XDECREF(val); + Py_DECREF(rval); + return NULL; +} + +static PyObject * +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON constant from PyString pystr. + constant is the constant string that was found + ("NaN", "Infinity", "-Infinity"). + idx is the index of the first character of the constant + *next_idx_ptr is a return-by-reference index to the first character after + the constant. + + Returns the result of parse_constant + */ + PyObject *cstr; + PyObject *rval; + /* constant is "NaN", "Infinity", or "-Infinity" */ + cstr = JSON_InternFromString(constant); + if (cstr == NULL) + return NULL; + + /* rval = parse_constant(constant) */ + rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); + idx += JSON_Intern_GET_SIZE(cstr); + Py_DECREF(cstr); + *next_idx_ptr = idx; + return rval; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON number from PyString pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (str[idx] == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + idx++; + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + if (str[idx] >= '1' && str[idx] <= '9') { + idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + /* if it starts with 0 we only expect one integer digit */ + else if (str[idx] == '0') { + idx++; + } + /* no integer digits, error */ + else { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + is_float = 1; + idx += 2; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + + /* save the index of the 'e' or 'E' just in case we need to backtrack */ + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ + numstr = PyString_FromStringAndSize(&str[start], idx - start); + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { + /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ + double d = PyOS_string_to_double(PyString_AS_STRING(numstr), + NULL, NULL); + if (d == -1.0 && PyErr_Occurred()) + return NULL; + rval = PyFloat_FromDouble(d); + } + } + else { + /* parse as an int using a fast path if available, otherwise call user defined method */ + if (s->parse_int != (PyObject *)&PyInt_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + else { + rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); + } + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ + /* Read a JSON number from PyUnicode pystr. + idx is the index of the first character of the number + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of that number: + PyInt, PyLong, or PyFloat. + May return other types if parse_int or parse_float are set + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + Py_ssize_t idx = start; + int is_float = 0; + JSON_UNICHR c; + PyObject *rval; + PyObject *numstr; + + /* read a sign if it's there, make sure it's not the end of the string */ + if (PyUnicode_READ(kind, str, idx) == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + idx++; + } + + /* read as many integer digits as we find as long as it doesn't start with 0 */ + c = PyUnicode_READ(kind, str, idx); + if (c == '0') { + /* if it starts with 0 we only expect one integer digit */ + idx++; + } + else if (IS_DIGIT(c)) { + idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { + idx++; + } + } + else { + /* no integer digits, error */ + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + + /* if the next char is '.' followed by a digit then read all float digits */ + if (idx < end_idx && + PyUnicode_READ(kind, str, idx) == '.' && + IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { + is_float = 1; + idx += 2; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; + } + + /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == 'e' || + PyUnicode_READ(kind, str, idx) == 'E')) { + Py_ssize_t e_start = idx; + idx++; + + /* read an exponent sign if present */ + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == '-' || + PyUnicode_READ(kind, str, idx) == '+')) idx++; + + /* read all digits */ + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; + + /* if we got a digit, then parse as float. if not, backtrack */ + if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { + is_float = 1; + } + else { + idx = e_start; + } + } + + /* copy the section we determined to be a number */ +#if PY_MAJOR_VERSION >= 3 + numstr = PyUnicode_Substring(pystr, start, idx); +#else + numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); +#endif + if (numstr == NULL) + return NULL; + if (is_float) { + /* parse as a float using a fast path if available, otherwise call user defined method */ + if (s->parse_float != (PyObject *)&PyFloat_Type) { + rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); + } + else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else + rval = PyFloat_FromString(numstr, NULL); +#endif + } + } + else { + /* no fast path for unicode -> int, just call */ + rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); + } + Py_DECREF(numstr); + *next_idx_ptr = idx; + return rval; +} + +#if PY_MAJOR_VERSION < 3 +static PyObject * +scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyString pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + char *str = PyString_AS_STRING(pystr); + Py_ssize_t length = PyString_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + switch (str[idx]) { + case '"': + /* string */ + rval = scanstring_str(pystr, idx + 1, + JSON_ASCII_AS_STRING(s->encoding), + PyObject_IsTrue(s->strict), + next_idx_ptr); + break; + case '{': + /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case '[': + /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case 'n': + /* null */ + if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + rval = Py_None; + } + else + fallthrough = 1; + break; + case 't': + /* true */ + if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + rval = Py_True; + } + else + fallthrough = 1; + break; + case 'f': + /* false */ + if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + rval = Py_False; + } + else + fallthrough = 1; + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + default: + fallthrough = 1; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + return rval; +} +#endif /* PY_MAJOR_VERSION < 3 */ + + +static PyObject * +scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ + /* Read one JSON term (of any kind) from PyUnicode pystr. + idx is the index of the first character of the term + *next_idx_ptr is a return-by-reference index to the first character after + the number. + + Returns a new PyObject representation of the term. + */ + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t length = PyUnicode_GetLength(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } + switch (PyUnicode_READ(kind, str, idx)) { + case '"': + /* string */ + rval = scanstring_unicode(pystr, idx + 1, + PyObject_IsTrue(s->strict), + next_idx_ptr); + break; + case '{': + /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) + return NULL; + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case '[': + /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) + return NULL; + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; + case 'n': + /* null */ + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'u' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 'l') { + Py_INCREF(Py_None); + *next_idx_ptr = idx + 4; + rval = Py_None; + } + else + fallthrough = 1; + break; + case 't': + /* true */ + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'r' && + PyUnicode_READ(kind, str, idx + 2) == 'u' && + PyUnicode_READ(kind, str, idx + 3) == 'e') { + Py_INCREF(Py_True); + *next_idx_ptr = idx + 4; + rval = Py_True; + } + else + fallthrough = 1; + break; + case 'f': + /* false */ + if ((idx + 4 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { + Py_INCREF(Py_False); + *next_idx_ptr = idx + 5; + rval = Py_False; + } + else + fallthrough = 1; + break; + case 'N': + /* NaN */ + if ((idx + 2 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case 'I': + /* Infinity */ + if ((idx + 7 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + case '-': + /* -Infinity */ + if ((idx + 8 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); + } + else + fallthrough = 1; + break; + default: + fallthrough = 1; + } + /* Didn't find a string, object, array, or named constant. Look for a number. */ + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + return rval; +} + +static PyObject * +scanner_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to scan_once_{str,unicode} */ + PyObject *pystr; + PyObject *rval; + Py_ssize_t idx; + Py_ssize_t next_idx = -1; + static char *kwlist[] = {"string", "idx", NULL}; + PyScannerObject *s; + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) + return NULL; + + if (PyUnicode_Check(pystr)) { + rval = scan_once_unicode(s, pystr, idx, &next_idx); + } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + PyDict_Clear(s->memo); + return _build_rval_index_tuple(rval, next_idx); +} + +static PyObject * +scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyScannerObject *s; + s = (PyScannerObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->encoding = NULL; + s->strict = NULL; + s->object_hook = NULL; + s->pairs_hook = NULL; + s->parse_float = NULL; + s->parse_int = NULL; + s->parse_constant = NULL; + } + return (PyObject *)s; +} + +static PyObject * +JSON_ParseEncoding(PyObject *encoding) +{ + if (encoding == NULL) + return NULL; + if (encoding == Py_None) + return JSON_InternFromString(DEFAULT_ENCODING); +#if PY_MAJOR_VERSION < 3 + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif + if (JSON_ASCII_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + PyErr_SetString(PyExc_TypeError, "encoding must be a string"); + return NULL; +} + +static int +scanner_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Initialize Scanner object */ + PyObject *ctx; + static char *kwlist[] = {"context", NULL}; + PyScannerObject *s; + PyObject *encoding; + + assert(PyScanner_Check(self)); + s = (PyScannerObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) + return -1; + + if (s->memo == NULL) { + s->memo = PyDict_New(); + if (s->memo == NULL) + goto bail; + } + + /* JSON_ASCII_AS_STRING is used on encoding */ + encoding = PyObject_GetAttrString(ctx, "encoding"); + s->encoding = JSON_ParseEncoding(encoding); + Py_XDECREF(encoding); + if (s->encoding == NULL) + goto bail; + + /* All of these will fail "gracefully" so we don't need to verify them */ + s->strict = PyObject_GetAttrString(ctx, "strict"); + if (s->strict == NULL) + goto bail; + s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); + if (s->pairs_hook == NULL) + goto bail; + s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); + if (s->parse_float == NULL) + goto bail; + s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); + if (s->parse_int == NULL) + goto bail; + s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); + if (s->parse_constant == NULL) + goto bail; + + return 0; + +bail: + Py_CLEAR(s->encoding); + Py_CLEAR(s->strict); + Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); + Py_CLEAR(s->parse_float); + Py_CLEAR(s->parse_int); + Py_CLEAR(s->parse_constant); + return -1; +} + +PyDoc_STRVAR(scanner_doc, "JSON scanner object"); + +static +PyTypeObject PyScannerType = { + PyVarObject_HEAD_INIT(NULL, 0) + "simplejson._speedups.Scanner", /* tp_name */ + sizeof(PyScannerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + scanner_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + scanner_call, /* tp_call */ + 0, /* tp_str */ + 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ + 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + scanner_doc, /* tp_doc */ + scanner_traverse, /* tp_traverse */ + scanner_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + scanner_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + scanner_init, /* tp_init */ + 0,/* PyType_GenericAlloc, */ /* tp_alloc */ + scanner_new, /* tp_new */ + 0,/* PyObject_GC_Del, */ /* tp_free */ +}; + +static PyObject * +encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyEncoderObject *s; + s = (PyEncoderObject *)type->tp_alloc(type, 0); + if (s != NULL) { + s->markers = NULL; + s->defaultfn = NULL; + s->encoder = NULL; + s->encoding = NULL; + s->indent = NULL; + s->key_separator = NULL; + s->item_separator = NULL; + s->key_memo = NULL; + s->sort_keys = NULL; + s->item_sort_key = NULL; + s->item_sort_kw = NULL; + s->Decimal = NULL; + s->max_long_size = NULL; + s->min_long_size = NULL; + } + return (PyObject *)s; +} + +static int +encoder_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* initialize Encoder object */ + static char *kwlist[] = { + "markers", + "default", + "encoder", + "indent", + "key_separator", + "item_separator", + "sort_keys", + "skipkeys", + "allow_nan", + "key_memo", + "use_decimal", + "namedtuple_as_object", + "tuple_as_array", + "int_as_string_bitcount", + "item_sort_key", + "encoding", + "for_json", + "ignore_nan", + "Decimal", + NULL}; + + PyEncoderObject *s; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; + PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; + PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json; + PyObject *ignore_nan, *Decimal; + + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, + &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, + &namedtuple_as_object, &tuple_as_array, + &int_as_string_bitcount, &item_sort_key, &encoding, &for_json, + &ignore_nan, &Decimal)) + return -1; + + Py_INCREF(markers); + s->markers = markers; + Py_INCREF(defaultfn); + s->defaultfn = defaultfn; + Py_INCREF(encoder); + s->encoder = encoder; + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + return -1; + Py_INCREF(indent); + s->indent = indent; + Py_INCREF(key_separator); + s->key_separator = key_separator; + Py_INCREF(item_separator); + s->item_separator = item_separator; + Py_INCREF(skipkeys); + s->skipkeys_bool = skipkeys; + s->skipkeys = PyObject_IsTrue(skipkeys); + Py_INCREF(key_memo); + s->key_memo = key_memo; + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_or_ignore_nan = ( + (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) | + (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0)); + s->use_decimal = PyObject_IsTrue(use_decimal); + s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); + s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) { + static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8; + int int_as_string_bitcount_val = PyLong_AsLong(int_as_string_bitcount); + if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < long_long_bitsize) { + s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << int_as_string_bitcount_val); + s->min_long_size = PyLong_FromLongLong(-1LL << int_as_string_bitcount_val); + if (s->min_long_size == NULL || s->max_long_size == NULL) { + return -1; + } + } + else { + PyErr_Format(PyExc_TypeError, + "int_as_string_bitcount (%d) must be greater than 0 and less than the number of bits of a `long long` type (%u bits)", + int_as_string_bitcount_val, long_long_bitsize); + return -1; + } + } + else if (int_as_string_bitcount == Py_None) { + Py_INCREF(Py_None); + s->max_long_size = Py_None; + Py_INCREF(Py_None); + s->min_long_size = Py_None; + } + else { + PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer"); + return -1; + } + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) { + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + return -1; + } + } + else if (PyObject_IsTrue(sort_keys)) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + return -1; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + return -1; + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + return -1; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + return -1; + } + Py_INCREF(sort_keys); + s->sort_keys = sort_keys; + Py_INCREF(item_sort_key); + s->item_sort_key = item_sort_key; + Py_INCREF(Decimal); + s->Decimal = Decimal; + s->for_json = PyObject_IsTrue(for_json); + + return 0; +} + +static PyObject * +encoder_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* Python callable interface to encode_listencode_obj */ + static char *kwlist[] = {"obj", "_current_indent_level", NULL}; + PyObject *obj; + Py_ssize_t indent_level; + PyEncoderObject *s; + JSON_Accu rval; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, + &obj, _convertPyInt_AsSsize_t, &indent_level)) + return NULL; + if (JSON_Accu_Init(&rval)) + return NULL; + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); + return NULL; + } + return JSON_Accu_FinishAsList(&rval); +} + +static PyObject * +_encoded_const(PyObject *obj) +{ + /* Return the JSON string representation of None, True, False */ + if (obj == Py_None) { + static PyObject *s_null = NULL; + if (s_null == NULL) { + s_null = JSON_InternFromString("null"); + } + Py_INCREF(s_null); + return s_null; + } + else if (obj == Py_True) { + static PyObject *s_true = NULL; + if (s_true == NULL) { + s_true = JSON_InternFromString("true"); + } + Py_INCREF(s_true); + return s_true; + } + else if (obj == Py_False) { + static PyObject *s_false = NULL; + if (s_false == NULL) { + s_false = JSON_InternFromString("false"); + } + Py_INCREF(s_false); + return s_false; + } + else { + PyErr_SetString(PyExc_ValueError, "not a const"); + return NULL; + } +} + +static PyObject * +encoder_encode_float(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a PyFloat */ + double i = PyFloat_AS_DOUBLE(obj); + if (!Py_IS_FINITE(i)) { + if (!s->allow_or_ignore_nan) { + PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); + return NULL; + } + if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) { + return _encoded_const(Py_None); + } + /* JSON_ALLOW_NAN is set */ + else if (i > 0) { + static PyObject *sInfinity = NULL; + if (sInfinity == NULL) + sInfinity = JSON_InternFromString("Infinity"); + if (sInfinity) + Py_INCREF(sInfinity); + return sInfinity; + } + else if (i < 0) { + static PyObject *sNegInfinity = NULL; + if (sNegInfinity == NULL) + sNegInfinity = JSON_InternFromString("-Infinity"); + if (sNegInfinity) + Py_INCREF(sNegInfinity); + return sNegInfinity; + } + else { + static PyObject *sNaN = NULL; + if (sNaN == NULL) + sNaN = JSON_InternFromString("NaN"); + if (sNaN) + Py_INCREF(sNaN); + return sNaN; + } + } + /* Use a better float format here? */ + return PyObject_Repr(obj); +} + +static PyObject * +encoder_encode_string(PyEncoderObject *s, PyObject *obj) +{ + /* Return the JSON representation of a string */ + if (s->fast_encode) + return py_encode_basestring_ascii(NULL, obj); + else + return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); +} + +static int +_steal_accumulate(JSON_Accu *accu, PyObject *stolen) +{ + /* Append stolen and then decrement its reference count */ + int rval = JSON_Accu_Accumulate(accu, stolen); + Py_DECREF(stolen); + return rval; +} + +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) +{ + /* Encode Python object obj to a JSON term, rval is a PyList */ + int rv = -1; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_accumulate(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) { + encoded = maybe_quote_bigint(s, encoded, obj); + if (encoded == NULL) + break; + rv = _steal_accumulate(rval, encoded); + } + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (s->for_json && _has_for_json_hook(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "for_json", NULL); + if (newobj != NULL) { + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (s->namedtuple_as_object && _is_namedtuple(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "_asdict", NULL); + if (newobj != NULL) { + rv = encoder_listencode_dict(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_list(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (PyDict_Check(obj)) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_dict(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } + } + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + Py_LeaveRecursiveCall(); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_LeaveRecursiveCall(); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } + Py_XDECREF(ident); + } + } + } while (0); + return rv; +} + +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) +{ + /* Encode Python dict dct a JSON term */ + static PyObject *open_dict = NULL; + static PyObject *close_dict = NULL; + static PyObject *empty_dict = NULL; + PyObject *kstr = NULL; + PyObject *ident = NULL; + PyObject *iter = NULL; + PyObject *item = NULL; + PyObject *items = NULL; + PyObject *encoded = NULL; + Py_ssize_t idx; + + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + return -1; + } + if (PyDict_Size(dct) == 0) + return JSON_Accu_Accumulate(rval, empty_dict); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(dct); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, dct)) { + goto bail; + } + } + + if (JSON_Accu_Accumulate(rval, open_dict)) + goto bail; + + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + + iter = encoder_dict_iteritems(s, dct); + if (iter == NULL) + goto bail; + + idx = 0; + while ((item = PyIter_Next(iter))) { + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + + encoded = PyDict_GetItem(s->key_memo, key); + if (encoded != NULL) { + Py_INCREF(encoded); + } else { + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(item); + Py_DECREF(kstr); + continue; + } + } + if (idx) { + if (JSON_Accu_Accumulate(rval, s->item_separator)) + goto bail; + } + if (encoded == NULL) { + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyDict_SetItem(s->key_memo, key, encoded)) + goto bail; + } + if (JSON_Accu_Accumulate(rval, encoded)) { + goto bail; + } + Py_CLEAR(encoded); + if (JSON_Accu_Accumulate(rval, s->key_separator)) + goto bail; + if (encoder_listencode_obj(s, rval, value, indent_level)) + goto bail; + Py_CLEAR(item); + idx += 1; + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (_indent * _current_indent_level) + */ + } + if (JSON_Accu_Accumulate(rval, close_dict)) + goto bail; + return 0; + +bail: + Py_XDECREF(encoded); + Py_XDECREF(items); + Py_XDECREF(iter); + Py_XDECREF(kstr); + Py_XDECREF(ident); + return -1; +} + + +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) +{ + /* Encode Python list seq to a JSON term */ + static PyObject *open_array = NULL; + static PyObject *close_array = NULL; + static PyObject *empty_array = NULL; + PyObject *ident = NULL; + PyObject *iter = NULL; + PyObject *obj = NULL; + int is_true; + int i = 0; + + if (open_array == NULL || close_array == NULL || empty_array == NULL) { + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); + if (open_array == NULL || close_array == NULL || empty_array == NULL) + return -1; + } + ident = NULL; + is_true = PyObject_IsTrue(seq); + if (is_true == -1) + return -1; + else if (is_true == 0) + return JSON_Accu_Accumulate(rval, empty_array); + + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(seq); + if (ident == NULL) + goto bail; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + goto bail; + } + if (PyDict_SetItem(s->markers, ident, seq)) { + goto bail; + } + } + + iter = PyObject_GetIter(seq); + if (iter == NULL) + goto bail; + + if (JSON_Accu_Accumulate(rval, open_array)) + goto bail; + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level += 1; + /* + newline_indent = '\n' + (_indent * _current_indent_level) + separator = _item_separator + newline_indent + buf += newline_indent + */ + } + while ((obj = PyIter_Next(iter))) { + if (i) { + if (JSON_Accu_Accumulate(rval, s->item_separator)) + goto bail; + } + if (encoder_listencode_obj(s, rval, obj, indent_level)) + goto bail; + i++; + Py_CLEAR(obj); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) + goto bail; + Py_CLEAR(ident); + } + if (s->indent != Py_None) { + /* TODO: DOES NOT RUN */ + indent_level -= 1; + /* + yield '\n' + (_indent * _current_indent_level) + */ + } + if (JSON_Accu_Accumulate(rval, close_array)) + goto bail; + return 0; + +bail: + Py_XDECREF(obj); + Py_XDECREF(iter); + Py_XDECREF(ident); + return -1; +} + +static void +encoder_dealloc(PyObject *self) +{ + /* Deallocate Encoder */ + encoder_clear(self); + Py_TYPE(self)->tp_free(self); +} + +static int +encoder_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_VISIT(s->markers); + Py_VISIT(s->defaultfn); + Py_VISIT(s->encoder); + Py_VISIT(s->encoding); + Py_VISIT(s->indent); + Py_VISIT(s->key_separator); + Py_VISIT(s->item_separator); + Py_VISIT(s->key_memo); + Py_VISIT(s->sort_keys); + Py_VISIT(s->item_sort_kw); + Py_VISIT(s->item_sort_key); + Py_VISIT(s->max_long_size); + Py_VISIT(s->min_long_size); + Py_VISIT(s->Decimal); + return 0; +} + +static int +encoder_clear(PyObject *self) +{ + /* Deallocate Encoder */ + PyEncoderObject *s; + assert(PyEncoder_Check(self)); + s = (PyEncoderObject *)self; + Py_CLEAR(s->markers); + Py_CLEAR(s->defaultfn); + Py_CLEAR(s->encoder); + Py_CLEAR(s->encoding); + Py_CLEAR(s->indent); + Py_CLEAR(s->key_separator); + Py_CLEAR(s->item_separator); + Py_CLEAR(s->key_memo); + Py_CLEAR(s->skipkeys_bool); + Py_CLEAR(s->sort_keys); + Py_CLEAR(s->item_sort_kw); + Py_CLEAR(s->item_sort_key); + Py_CLEAR(s->max_long_size); + Py_CLEAR(s->min_long_size); + Py_CLEAR(s->Decimal); + return 0; +} + +PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); + +static +PyTypeObject PyEncoderType = { + PyVarObject_HEAD_INIT(NULL, 0) + "simplejson._speedups.Encoder", /* tp_name */ + sizeof(PyEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + encoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + encoder_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + encoder_doc, /* tp_doc */ + encoder_traverse, /* tp_traverse */ + encoder_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + encoder_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + encoder_init, /* tp_init */ + 0, /* tp_alloc */ + encoder_new, /* tp_new */ + 0, /* tp_free */ +}; + +static PyMethodDef speedups_methods[] = { + {"encode_basestring_ascii", + (PyCFunction)py_encode_basestring_ascii, + METH_O, + pydoc_encode_basestring_ascii}, + {"scanstring", + (PyCFunction)py_scanstring, + METH_VARARGS, + pydoc_scanstring}, + {NULL, NULL, 0, NULL} +}; + +PyDoc_STRVAR(module_doc, +"simplejson speedups\n"); + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +static PyObject * +moduleinit(void) +{ + PyObject *m; + PyScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyScannerType) < 0) + return NULL; + PyEncoderType.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyEncoderType) < 0) + return NULL; + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif + Py_INCREF((PyObject*)&PyScannerType); + PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); + Py_INCREF((PyObject*)&PyEncoderType); + PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + return m; +} + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); +} +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif |