diff options
author | Walter Purcaro <vuolter@gmail.com> | 2014-06-28 16:20:53 +0200 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2014-06-28 20:23:56 +0200 |
commit | 857cb9714f9367849bd1dc7498239ea3fb444227 (patch) | |
tree | 50ecd03d5dc542ec8ef5cdbcfd8aa6fd904d4416 /module/lib | |
parent | [Lib] Update BeautifulSoup.py to version 3.2.1 (diff) | |
download | pyload-857cb9714f9367849bd1dc7498239ea3fb444227.tar.xz |
[Lib] Update simplejson to version 3.5.3
Diffstat (limited to 'module/lib')
-rw-r--r-- | module/lib/simplejson/__init__.py | 200 | ||||
-rw-r--r-- | module/lib/simplejson/compat.py | 46 | ||||
-rw-r--r-- | module/lib/simplejson/decoder.py | 178 | ||||
-rw-r--r-- | module/lib/simplejson/encoder.py | 346 | ||||
-rw-r--r-- | module/lib/simplejson/scanner.py | 62 | ||||
-rw-r--r-- | module/lib/simplejson/tool.py | 25 |
6 files changed, 571 insertions, 286 deletions
diff --git a/module/lib/simplejson/__init__.py b/module/lib/simplejson/__init__.py index ef5c0db48..a5c01379a 100644 --- a/module/lib/simplejson/__init__.py +++ b/module/lib/simplejson/__init__.py @@ -14,15 +14,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -31,14 +31,14 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) { "4": 5, "6": 7 @@ -52,7 +52,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -95,33 +95,35 @@ Using simplejson.tool from the shell to validate and pretty-print:: "json": "obj" } $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) + Expecting property name: line 1 column 3 (char 2) """ -__version__ = '2.2.1' +from __future__ import absolute_import +__version__ = '3.5.3' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', - 'OrderedDict', + 'OrderedDict', 'simple_first', ] __author__ = 'Bob Ippolito <bob@redivi.com>' from decimal import Decimal -from decoder import JSONDecoder, JSONDecodeError -from encoder import JSONEncoder +from .scanner import JSONDecodeError +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML def _import_OrderedDict(): import collections try: return collections.OrderedDict except AttributeError: - import ordered_dict + from . import ordered_dict return ordered_dict.OrderedDict OrderedDict = _import_OrderedDict() def _import_c_make_encoder(): try: - from simplejson._speedups import make_encoder + from ._speedups import make_encoder return make_encoder except ImportError: return None @@ -138,34 +140,41 @@ _default_encoder = JSONEncoder( use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=True, - namedtuple_as_object=True, tuple_as_array=True, - **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). - If ``skipkeys`` is true then ``dict`` keys that are not basic types + If *skipkeys* is true then ``dict`` keys that are not basic types (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + If *ensure_ascii* is false, then the some chunks written to ``fp`` may be ``unicode`` instances, subject to normal Python ``str`` to ``unicode`` coercion rules. Unless ``fp.write()`` explicitly understands ``unicode`` (as in ``codecs.getwriter()``) this is likely to cause an error. - If ``check_circular`` is false, then the circular reference check + If *check_circular* is false, then the circular reference check for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). - If ``allow_nan`` is false, then it will be a ``ValueError`` to + If *allow_nan* is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. If *indent* is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated @@ -174,14 +183,16 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, versions of simplejson earlier than 2.1.0, an integer is also accepted and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + *encoding* is the character encoding for str instances, default is UTF-8. - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. If *use_decimal* is true (default: ``True``) then decimal.Decimal will be natively serialized to JSON with full precision. @@ -189,13 +200,41 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, If *namedtuple_as_object* is true (default: ``True``), :class:`tuple` subclasses with ``_asdict()`` methods will be encoded as JSON objects. - + If *tuple_as_array* is true (default: ``True``), :class:`tuple` (and subclasses) will be encoded as JSON arrays. + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. """ # cached encoder @@ -203,7 +242,9 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal - and namedtuple_as_object and tuple_as_array and not kw): + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and int_as_string_bitcount is None + and not item_sort_key and not for_json and not ignore_nan and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -214,6 +255,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, default=default, use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost @@ -222,11 +269,11 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=True, - namedtuple_as_object=True, - tuple_as_array=True, - **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -253,9 +300,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, versions of simplejson earlier than 2.1.0, an integer is also accepted and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. ``encoding`` is the character encoding for str instances, default is UTF-8. @@ -268,21 +317,52 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, If *namedtuple_as_object* is true (default: ``True``), :class:`tuple` subclasses with ``_asdict()`` methods will be encoded as JSON objects. - + If *tuple_as_array* is true (default: ``True``), :class:`tuple` (and subclasses) will be encoded as JSON arrays. + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. """ # cached encoder - if (not skipkeys and ensure_ascii and + if ( + not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal - and namedtuple_as_object and tuple_as_array and not kw): + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and int_as_string_bitcount is None + and not sort_keys and not item_sort_key and not for_json + and not ignore_nan and not kw + ): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -293,6 +373,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).encode(obj) @@ -347,7 +433,8 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ return loads(fp.read(), @@ -403,7 +490,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ if (cls is None and encoding is None and object_hook is None and @@ -431,14 +519,14 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, def _toggle_speedups(enabled): - import simplejson.decoder as dec - import simplejson.encoder as enc - import simplejson.scanner as scan + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan c_make_encoder = _import_c_make_encoder() if enabled: dec.scanstring = dec.c_scanstring or dec.py_scanstring enc.c_make_encoder = c_make_encoder - enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or enc.py_encode_basestring_ascii) scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner else: @@ -464,3 +552,9 @@ def _toggle_speedups(enabled): encoding='utf-8', default=None, ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/module/lib/simplejson/compat.py b/module/lib/simplejson/compat.py new file mode 100644 index 000000000..a0af4a1cb --- /dev/null +++ b/module/lib/simplejson/compat.py @@ -0,0 +1,46 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/module/lib/simplejson/decoder.py b/module/lib/simplejson/decoder.py index e5496d6e7..1a6c5d938 100644 --- a/module/lib/simplejson/decoder.py +++ b/module/lib/simplejson/decoder.py @@ -1,24 +1,28 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner, JSONDecodeError -from simplejson.scanner import make_scanner def _import_c_scanstring(): try: - from simplejson._speedups import scanstring + from ._speedups import scanstring return scanstring except ImportError: return None c_scanstring = _import_c_scanstring() +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + _BYTES = fromhex('7FF80000000000007FF0000000000000') # The struct module in Python 2.4 would get frexp() out of range here # when an endian is specified in the format string. Fixed in Python 2.5+ if sys.byteorder != 'big': @@ -28,57 +32,6 @@ def _floatconstants(): NaN, PosInf, NegInf = _floatconstants() - -class JSONDecodeError(ValueError): - """Subclass of ValueError with the following additional properties: - - msg: The unformatted error message - doc: The JSON document being parsed - pos: The start index of doc where parsing failed - end: The end index of doc where parsing failed (may be None) - lineno: The line corresponding to pos - colno: The column corresponding to pos - endlineno: The line corresponding to end (may be None) - endcolno: The column corresponding to end (may be None) - - """ - def __init__(self, msg, doc, pos, end=None): - ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) - self.msg = msg - self.doc = doc - self.pos = pos - self.end = end - self.lineno, self.colno = linecol(doc, pos) - if end is not None: - self.endlineno, self.endcolno = linecol(doc, end) - else: - self.endlineno, self.endcolno = None, None - - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -87,14 +40,15 @@ _CONSTANTS = { STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), } DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match): + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError @@ -117,8 +71,8 @@ def py_scanstring(s, end, encoding=None, strict=True, content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) + if not _PY3 and not isinstance(content, text_type): + content = text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -126,8 +80,7 @@ def py_scanstring(s, end, encoding=None, strict=True, break elif terminator != '\\': if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) + msg = "Invalid control character %r at" raise JSONDecodeError(msg, s, end) else: _append(terminator) @@ -142,33 +95,42 @@ def py_scanstring(s, end, encoding=None, strict=True, try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: " + repr(esc) + msg = "Invalid \\X escape sequence %r" raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" esc = s[end + 1:end + 5] - next_end = end + 5 - if len(esc) != 4: - msg = "Invalid \\uXXXX escape" - raise JSONDecodeError(msg, s, end) - uni = int(esc, 16) + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise JSONDecodeError(msg, s, end) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise JSONDecodeError(msg, s, end) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 char = unichr(uni) - end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -177,9 +139,10 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, +def JSONObject(state, encoding, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state # Backwards compatibility if memo is None: memo = {} @@ -203,7 +166,9 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise JSONDecodeError("Expecting property name", s, end) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) end += 1 while True: key, end = scanstring(s, end, encoding, strict) @@ -214,7 +179,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise JSONDecodeError("Expecting : delimiter", s, end) + raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 @@ -226,10 +191,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, except IndexError: pass - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) + value, end = scan_once(s, end) pairs.append((key, value)) try: @@ -244,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, if nextchar == '}': break elif nextchar != ',': - raise JSONDecodeError("Expecting , delimiter", s, end - 1) + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) try: nextchar = s[end] @@ -259,7 +221,9 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, end += 1 if nextchar != '"': - raise JSONDecodeError("Expecting property name", s, end - 1) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) if object_pairs_hook is not None: result = object_pairs_hook(pairs) @@ -269,7 +233,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -278,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) _append = values.append while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) + value, end = scan_once(s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -293,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise JSONDecodeError("Expecting , delimiter", s, end) + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) try: if s[end] in _ws: @@ -317,7 +281,7 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | unicode | + | string | str, unicode | +---------------+-------------------+ | number (int) | int, long | +---------------+-------------------+ @@ -381,6 +345,8 @@ class JSONDecoder(object): ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -394,28 +360,34 @@ class JSONDecoder(object): self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + if _PY3 and isinstance(s, binary_type): + s = s.decode(self.encoding) + obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0): + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. This can be used to decode a JSON document from a string that may have extraneous data at the end. """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise JSONDecodeError("No JSON object could be decoded", s, idx) - return obj, end + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/module/lib/simplejson/encoder.py b/module/lib/simplejson/encoder.py index 5ec7440f1..db18244ec 100644 --- a/module/lib/simplejson/encoder.py +++ b/module/lib/simplejson/encoder.py @@ -1,11 +1,13 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re +from operator import itemgetter from decimal import Decimal - +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 def _import_speedups(): try: - from simplejson import _speedups + from . import _speedups return _speedups.encode_basestring_ascii, _speedups.make_encoder except ImportError: return None, None @@ -13,7 +15,10 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from simplejson.decoder import PosInf -ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +# This is required because u() will mangle the string and ur'' isn't valid +# python3 syntax +ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -24,32 +29,40 @@ ESCAPE_DCT = { '\n': '\\n', '\r': '\\r', '\t': '\\t', - u'\u2028': '\\u2028', - u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u('"')): """Return a JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] - return u'"' + ESCAPE.sub(replace, s) + u'"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -103,11 +116,14 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None, - use_decimal=True, namedtuple_as_object=True, - tuple_as_array=True): + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -139,9 +155,10 @@ class JSONEncoder(object): versions of simplejson earlier than 2.1.0, an integer is also accepted and is converted to a string with that many spaces. - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable @@ -155,11 +172,33 @@ class JSONEncoder(object): be supported directly by the encoder. For the inverse, decode JSON with ``parse_float=decimal.Decimal``. - If namedtuple_as_object is true (the default), tuple subclasses with + If namedtuple_as_object is true (the default), objects with ``_asdict()`` methods will be encoded as JSON objects. - + If tuple_as_array is true (the default), tuple (and subclasses) will be encoded as JSON arrays. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + """ self.skipkeys = skipkeys @@ -170,8 +209,13 @@ class JSONEncoder(object): self.use_decimal = use_decimal self.namedtuple_as_object = namedtuple_as_object self.tuple_as_array = tuple_as_array - if isinstance(indent, (int, long)): - indent = ' ' * indent + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators @@ -210,12 +254,11 @@ class JSONEncoder(object): """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -251,11 +294,11 @@ class JSONEncoder(object): _encoder = encode_basestring if self.encoding != 'utf-8': def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): + if isinstance(o, binary_type): o = o.decode(_encoding) return _orig_encoder(o) - def floatstr(o, allow_nan=self.allow_nan, + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): # Check for specials. Note that this type of test is processor # and/or platform-specific, so do tests which don't depend on @@ -270,28 +313,37 @@ class JSONEncoder(object): else: return _repr(o) - if not allow_nan: + if ignore_nan: + text = 'null' + elif not allow_nan: raise ValueError( "Out of range float values are not JSON compliant: " + repr(o)) return text - key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) if (_one_shot and c_make_encoder is not None and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, self.allow_nan, key_memo, self.use_decimal, - self.namedtuple_as_object, self.tuple_as_array) + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, Decimal) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, _one_shot, self.use_decimal, - self.namedtuple_as_object, self.tuple_as_array) + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + Decimal=Decimal) try: return _iterencode(o, 0) finally: @@ -328,22 +380,46 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _use_decimal, _namedtuple_as_object, _tuple_as_array, + _int_as_string_bitcount, _item_sort_key, + _encoding,_for_json, ## HACK: hand-optimized bytecode; turn globals into locals - False=False, - True=True, + _PY3=PY3, ValueError=ValueError, - basestring=basestring, + string_types=string_types, Decimal=Decimal, dict=dict, float=float, id=id, - int=int, + integer_types=integer_types, isinstance=isinstance, list=list, - long=long, str=str, tuple=tuple, ): + if _item_sort_key and not callable(_item_sort_key): + raise TypeError("item_sort_key must be None or callable") + elif _sort_keys and not _item_sort_key: + _item_sort_key = itemgetter(0) + + if (_int_as_string_bitcount is not None and + (_int_as_string_bitcount <= 0 or + not isinstance(_int_as_string_bitcount, integer_types))): + raise TypeError("int_as_string_bitcount must be a positive integer") + + def _encode_int(value): + skip_quoting = ( + _int_as_string_bitcount is None + or + _int_as_string_bitcount < 1 + ) + if ( + skip_quoting or + (-1 << _int_as_string_bitcount) + < value < + (1 << _int_as_string_bitcount) + ): + return str(value) + return '"' + str(value) + '"' def _iterencode_list(lst, _current_indent_level): if not lst: @@ -369,7 +445,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, first = False else: buf = separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield buf + _encoder(value) elif value is None: yield buf + 'null' @@ -377,26 +454,30 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield buf + 'true' elif value is False: yield buf + 'false' - elif isinstance(value, (int, long)): - yield buf + str(value) + elif isinstance(value, integer_types): + yield buf + _encode_int(value) elif isinstance(value, float): yield buf + _floatstr(value) elif _use_decimal and isinstance(value, Decimal): yield buf + str(value) else: yield buf - if isinstance(value, list): + for_json = _for_json and getattr(value, 'for_json', None) + if for_json and callable(for_json): + chunks = _iterencode(for_json(), _current_indent_level) + elif isinstance(value, list): chunks = _iterencode_list(value, _current_indent_level) - elif (_namedtuple_as_object and isinstance(value, tuple) and - hasattr(value, '_asdict')): - chunks = _iterencode_dict(value._asdict(), - _current_indent_level) - elif _tuple_as_array and isinstance(value, tuple): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) else: - chunks = _iterencode(value, _current_indent_level) + _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) + if _asdict and callable(_asdict): + chunks = _iterencode_dict(_asdict(), + _current_indent_level) + elif _tuple_as_array and isinstance(value, tuple): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) for chunk in chunks: yield chunk if newline_indent is not None: @@ -406,6 +487,29 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, if markers is not None: del markers[markerid] + def _stringify_key(key): + if isinstance(key, string_types): # pragma: no cover + pass + elif isinstance(key, binary_type): + key = key.decode(_encoding) + elif isinstance(key, float): + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, integer_types): + key = str(key) + elif _use_decimal and isinstance(key, Decimal): + key = str(key) + elif _skipkeys: + key = None + else: + raise TypeError("key " + repr(key) + " is not a string") + return key + def _iterencode_dict(dct, _current_indent_level): if not dct: yield '{}' @@ -425,37 +529,35 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, newline_indent = None item_separator = _item_separator first = True - if _sort_keys: - items = dct.items() - items.sort(key=lambda kv: kv[0]) + if _PY3: + iteritems = dct.items() else: - items = dct.iteritems() + iteritems = dct.iteritems() + if _item_sort_key: + items = [] + for k, v in dct.items(): + if not isinstance(k, string_types): + k = _stringify_key(k) + if k is None: + continue + items.append((k, v)) + items.sort(key=_item_sort_key) + else: + items = iteritems for key, value in items: - if isinstance(key, basestring): - pass - # JavaScript is weakly typed for these, so it makes sense to - # also allow them. Many encoders seem to do something like this. - elif isinstance(key, float): - key = _floatstr(key) - elif key is True: - key = 'true' - elif key is False: - key = 'false' - elif key is None: - key = 'null' - elif isinstance(key, (int, long)): - key = str(key) - elif _skipkeys: - continue - else: - raise TypeError("key " + repr(key) + " is not a string") + if not (_item_sort_key or isinstance(key, string_types)): + key = _stringify_key(key) + if key is None: + # _skipkeys must be True + continue if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, basestring): + if (isinstance(value, string_types) or + (_PY3 and isinstance(value, binary_type))): yield _encoder(value) elif value is None: yield 'null' @@ -463,25 +565,29 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif value is False: yield 'false' - elif isinstance(value, (int, long)): - yield str(value) + elif isinstance(value, integer_types): + yield _encode_int(value) elif isinstance(value, float): yield _floatstr(value) elif _use_decimal and isinstance(value, Decimal): yield str(value) else: - if isinstance(value, list): + for_json = _for_json and getattr(value, 'for_json', None) + if for_json and callable(for_json): + chunks = _iterencode(for_json(), _current_indent_level) + elif isinstance(value, list): chunks = _iterencode_list(value, _current_indent_level) - elif (_namedtuple_as_object and isinstance(value, tuple) and - hasattr(value, '_asdict')): - chunks = _iterencode_dict(value._asdict(), - _current_indent_level) - elif _tuple_as_array and isinstance(value, tuple): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) else: - chunks = _iterencode(value, _current_indent_level) + _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) + if _asdict and callable(_asdict): + chunks = _iterencode_dict(_asdict(), + _current_indent_level) + elif _tuple_as_array and isinstance(value, tuple): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) for chunk in chunks: yield chunk if newline_indent is not None: @@ -492,7 +598,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, del markers[markerid] def _iterencode(o, _current_indent_level): - if isinstance(o, basestring): + if (isinstance(o, string_types) or + (_PY3 and isinstance(o, binary_type))): yield _encoder(o) elif o is None: yield 'null' @@ -500,35 +607,42 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield 'true' elif o is False: yield 'false' - elif isinstance(o, (int, long)): - yield str(o) + elif isinstance(o, integer_types): + yield _encode_int(o) elif isinstance(o, float): yield _floatstr(o) - elif isinstance(o, list): - for chunk in _iterencode_list(o, _current_indent_level): - yield chunk - elif (_namedtuple_as_object and isinstance(o, tuple) and - hasattr(o, '_asdict')): - for chunk in _iterencode_dict(o._asdict(), _current_indent_level): - yield chunk - elif (_tuple_as_array and isinstance(o, tuple)): - for chunk in _iterencode_list(o, _current_indent_level): - yield chunk - elif isinstance(o, dict): - for chunk in _iterencode_dict(o, _current_indent_level): - yield chunk - elif _use_decimal and isinstance(o, Decimal): - yield str(o) else: - if markers is not None: - markerid = id(o) - if markerid in markers: - raise ValueError("Circular reference detected") - markers[markerid] = o - o = _default(o) - for chunk in _iterencode(o, _current_indent_level): - yield chunk - if markers is not None: - del markers[markerid] + for_json = _for_json and getattr(o, 'for_json', None) + if for_json and callable(for_json): + for chunk in _iterencode(for_json(), _current_indent_level): + yield chunk + elif isinstance(o, list): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + else: + _asdict = _namedtuple_as_object and getattr(o, '_asdict', None) + if _asdict and callable(_asdict): + for chunk in _iterencode_dict(_asdict(), + _current_indent_level): + yield chunk + elif (_tuple_as_array and isinstance(o, tuple)): + for chunk in _iterencode_list(o, _current_indent_level): + yield chunk + elif isinstance(o, dict): + for chunk in _iterencode_dict(o, _current_indent_level): + yield chunk + elif _use_decimal and isinstance(o, Decimal): + yield str(o) + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): + yield chunk + if markers is not None: + del markers[markerid] return _iterencode diff --git a/module/lib/simplejson/scanner.py b/module/lib/simplejson/scanner.py index 54593a371..5abed357b 100644 --- a/module/lib/simplejson/scanner.py +++ b/module/lib/simplejson/scanner.py @@ -9,12 +9,62 @@ def _import_c_make_scanner(): return None c_make_scanner = _import_c_make_scanner() -__all__ = ['make_scanner'] +__all__ = ['make_scanner', 'JSONDecodeError'] NUMBER_RE = re.compile( r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', (re.VERBOSE | re.MULTILINE | re.DOTALL)) +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + end: The end index of doc where parsing failed (may be None) + lineno: The line corresponding to pos + colno: The column corresponding to pos + endlineno: The line corresponding to end (may be None) + endcolno: The column corresponding to end (may be None) + + """ + # Note that this exception is used from _speedups + def __init__(self, msg, doc, pos, end=None): + ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) + self.msg = msg + self.doc = doc + self.pos = pos + self.end = end + self.lineno, self.colno = linecol(doc, pos) + if end is not None: + self.endlineno, self.endcolno = linecol(doc, end) + else: + self.endlineno, self.endcolno = None, None + + def __reduce__(self): + return self.__class__, (self.msg, self.doc, self.pos, self.end) + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + 1 + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + msg = msg.replace('%r', repr(doc[pos:pos + 1])) + if end is None: + fmt = '%s: line %d column %d (char %d)' + return fmt % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' + return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + + def py_make_scanner(context): parse_object = context.parse_object parse_array = context.parse_array @@ -30,10 +80,11 @@ def py_make_scanner(context): memo = context.memo def _scan_once(string, idx): + errmsg = 'Expecting value' try: nextchar = string[idx] except IndexError: - raise StopIteration + raise JSONDecodeError(errmsg, string, idx) if nextchar == '"': return parse_string(string, idx + 1, encoding, strict) @@ -64,9 +115,14 @@ def py_make_scanner(context): elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: - raise StopIteration + raise JSONDecodeError(errmsg, string, idx) def scan_once(string, idx): + if idx < 0: + # Ensure the same behavior as the C speedup, otherwise + # this would work for *some* negative string indices due + # to the behavior of __getitem__ for strings. #98 + raise JSONDecodeError('Expecting value', string, idx) try: return _scan_once(string, idx) finally: diff --git a/module/lib/simplejson/tool.py b/module/lib/simplejson/tool.py index 73370db55..062e8e2c1 100644 --- a/module/lib/simplejson/tool.py +++ b/module/lib/simplejson/tool.py @@ -10,6 +10,7 @@ Usage:: Expecting property name: line 1 column 2 (char 2) """ +from __future__ import with_statement import sys import simplejson as json @@ -18,21 +19,23 @@ def main(): infile = sys.stdin outfile = sys.stdout elif len(sys.argv) == 2: - infile = open(sys.argv[1], 'rb') + infile = open(sys.argv[1], 'r') outfile = sys.stdout elif len(sys.argv) == 3: - infile = open(sys.argv[1], 'rb') - outfile = open(sys.argv[2], 'wb') + infile = open(sys.argv[1], 'r') + outfile = open(sys.argv[2], 'w') else: raise SystemExit(sys.argv[0] + " [infile [outfile]]") - try: - obj = json.load(infile, - object_pairs_hook=json.OrderedDict, - use_decimal=True) - except ValueError, e: - raise SystemExit(e) - json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) - outfile.write('\n') + with infile: + try: + obj = json.load(infile, + object_pairs_hook=json.OrderedDict, + use_decimal=True) + except ValueError: + raise SystemExit(sys.exc_info()[1]) + with outfile: + json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) + outfile.write('\n') if __name__ == '__main__': |