update simplejson to v3.6.5

This commit is contained in:
Alan Hamlett 2015-03-23 14:00:40 -07:00
parent 4fca5e1c06
commit 7fb47228f9
14 changed files with 525 additions and 268 deletions

View file

@ -98,7 +98,7 @@ Using simplejson.tool from the shell to validate and pretty-print::
Expecting property name: line 1 column 3 (char 2) Expecting property name: line 1 column 3 (char 2)
""" """
from __future__ import absolute_import from __future__ import absolute_import
__version__ = '3.3.0' __version__ = '3.6.5'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@ -144,14 +144,15 @@ _default_encoder = JSONEncoder(
item_sort_key=None, item_sort_key=None,
for_json=False, for_json=False,
ignore_nan=False, ignore_nan=False,
int_as_string_bitcount=None,
) )
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True, encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True, namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None, bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, **kw): for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object). ``.write()``-supporting file-like object).
@ -209,6 +210,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
lossy operation that will not round-trip correctly and should be used lossy operation that will not round-trip correctly and should be used
sparingly. sparingly.
If *int_as_string_bitcount* is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, *item_sort_key* is a callable used to sort the items in If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precedence over in alphabetical order by key. This option takes precedence over
@ -238,8 +243,11 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not item_sort_key and not bigint_as_string and not sort_keys
and not for_json and not ignore_nan and not kw): and not item_sort_key and not for_json
and not ignore_nan and int_as_string_bitcount is None
and not kw
):
iterable = _default_encoder.iterencode(obj) iterable = _default_encoder.iterencode(obj)
else: else:
if cls is None: if cls is None:
@ -255,6 +263,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
item_sort_key=item_sort_key, item_sort_key=item_sort_key,
for_json=for_json, for_json=for_json,
ignore_nan=ignore_nan, ignore_nan=ignore_nan,
int_as_string_bitcount=int_as_string_bitcount,
**kw).iterencode(obj) **kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at # could accelerate with writelines in some versions of Python, at
# a debuggability cost # a debuggability cost
@ -263,11 +272,11 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True, encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True, namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None, bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, **kw): for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``. """Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types If ``skipkeys`` is false then ``dict`` keys that are not basic types
@ -319,6 +328,10 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
or lower than -2**53 will be encoded as strings. This is to avoid the or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise. rounding that happens in Javascript otherwise.
If *int_as_string_bitcount* is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, *item_sort_key* is a callable used to sort the items in If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precendence over in alphabetical order by key. This option takes precendence over
@ -343,14 +356,17 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
""" """
# cached encoder # cached encoder
if (not skipkeys and ensure_ascii and if (
not skipkeys and ensure_ascii and
check_circular and allow_nan and check_circular and allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not sort_keys and not bigint_as_string and not sort_keys
and not item_sort_key and not for_json and not item_sort_key and not for_json
and not ignore_nan and not kw): and not ignore_nan and int_as_string_bitcount is None
and not kw
):
return _default_encoder.encode(obj) return _default_encoder.encode(obj)
if cls is None: if cls is None:
cls = JSONEncoder cls = JSONEncoder
@ -366,6 +382,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
item_sort_key=item_sort_key, item_sort_key=item_sort_key,
for_json=for_json, for_json=for_json,
ignore_nan=ignore_nan, ignore_nan=ignore_nan,
int_as_string_bitcount=int_as_string_bitcount,
**kw).encode(obj) **kw).encode(obj)

View file

@ -168,7 +168,8 @@ typedef struct _PyEncoderObject {
int use_decimal; int use_decimal;
int namedtuple_as_object; int namedtuple_as_object;
int tuple_as_array; int tuple_as_array;
int bigint_as_string; PyObject *max_long_size;
PyObject *min_long_size;
PyObject *item_sort_key; PyObject *item_sort_key;
PyObject *item_sort_kw; PyObject *item_sort_kw;
int for_json; int for_json;
@ -187,6 +188,8 @@ static PyMemberDef encoder_members[] = {
{"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"}, {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"},
{"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
{"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
{"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"},
{"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"},
{NULL} {NULL}
}; };
@ -197,7 +200,7 @@ JSON_ParseEncoding(PyObject *encoding);
static PyObject * static PyObject *
JSON_UnicodeFromChar(JSON_UNICHR c); JSON_UnicodeFromChar(JSON_UNICHR c);
static PyObject * static PyObject *
maybe_quote_bigint(PyObject *encoded, PyObject *obj); maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj);
static Py_ssize_t static Py_ssize_t
ascii_char_size(JSON_UNICHR c); ascii_char_size(JSON_UNICHR c);
static Py_ssize_t static Py_ssize_t
@ -384,35 +387,22 @@ JSON_UnicodeFromChar(JSON_UNICHR c)
} }
static PyObject * static PyObject *
maybe_quote_bigint(PyObject *encoded, PyObject *obj) maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj)
{ {
static PyObject *big_long = NULL; if (s->max_long_size != Py_None && s->min_long_size != Py_None) {
static PyObject *small_long = NULL; if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) ||
if (big_long == NULL) { PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) {
big_long = PyLong_FromLongLong(1LL << 53);
if (big_long == NULL) {
Py_DECREF(encoded);
return NULL;
}
}
if (small_long == NULL) {
small_long = PyLong_FromLongLong(-1LL << 53);
if (small_long == NULL) {
Py_DECREF(encoded);
return NULL;
}
}
if (PyObject_RichCompareBool(obj, big_long, Py_GE) ||
PyObject_RichCompareBool(obj, small_long, Py_LE)) {
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded);
#else #else
PyObject* quoted = PyString_FromFormat("\"%s\"", PyObject* quoted = PyString_FromFormat("\"%s\"",
PyString_AsString(encoded)); PyString_AsString(encoded));
#endif #endif
Py_DECREF(encoded); Py_DECREF(encoded);
encoded = quoted; encoded = quoted;
}
} }
return encoded; return encoded;
} }
@ -1023,13 +1013,13 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
/* Surrogate pair */ /* Surrogate pair */
if ((c & 0xfc00) == 0xd800) { if ((c & 0xfc00) == 0xd800) {
if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') {
JSON_UNICHR c2 = 0; JSON_UNICHR c2 = 0;
end += 6; end += 6;
/* Decode 4 hex digits */ /* Decode 4 hex digits */
for (next += 2; next < end; next++) { for (next += 2; next < end; next++) {
c2 <<= 4; c2 <<= 4;
JSON_UNICHR digit = buf[next]; JSON_UNICHR digit = buf[next];
switch (digit) { switch (digit) {
case '0': case '1': case '2': case '3': case '4': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0'); break; c2 |= (digit - '0'); break;
@ -1042,18 +1032,18 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s
default: default:
raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail; goto bail;
} }
} }
if ((c2 & 0xfc00) != 0xdc00) { if ((c2 & 0xfc00) != 0xdc00) {
/* not a low surrogate, rewind */ /* not a low surrogate, rewind */
end -= 6; end -= 6;
next = end; next = end;
} }
else { else {
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
} }
} }
} }
#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ #endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */
} }
if (c > 0x7f) { if (c > 0x7f) {
@ -1224,15 +1214,15 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
/* Surrogate pair */ /* Surrogate pair */
if ((c & 0xfc00) == 0xd800) { if ((c & 0xfc00) == 0xd800) {
JSON_UNICHR c2 = 0; JSON_UNICHR c2 = 0;
if (end + 6 < len && if (end + 6 < len &&
PyUnicode_READ(kind, buf, next) == '\\' && PyUnicode_READ(kind, buf, next) == '\\' &&
PyUnicode_READ(kind, buf, next + 1) == 'u') { PyUnicode_READ(kind, buf, next + 1) == 'u') {
end += 6; end += 6;
/* Decode 4 hex digits */ /* Decode 4 hex digits */
for (next += 2; next < end; next++) { for (next += 2; next < end; next++) {
JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
c2 <<= 4; c2 <<= 4;
switch (digit) { switch (digit) {
case '0': case '1': case '2': case '3': case '4': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0'); break; c2 |= (digit - '0'); break;
@ -1245,18 +1235,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
default: default:
raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
goto bail; goto bail;
} }
} }
if ((c2 & 0xfc00) != 0xdc00) { if ((c2 & 0xfc00) != 0xdc00) {
/* not a low surrogate, rewind */ /* not a low surrogate, rewind */
end -= 6; end -= 6;
next = end; next = end;
} }
else { else {
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
} }
} }
} }
#endif #endif
} }
APPEND_OLD_CHUNK APPEND_OLD_CHUNK
@ -1443,10 +1433,10 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* only loop if the object is non-empty */ /* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') { if (idx <= end_idx && str[idx] != '}') {
int trailing_delimiter = 0; int trailing_delimiter = 0;
while (idx <= end_idx) { while (idx <= end_idx) {
PyObject *memokey; PyObject *memokey;
trailing_delimiter = 0; trailing_delimiter = 0;
/* read key */ /* read key */
if (str[idx] != '"') { if (str[idx] != '"') {
@ -1506,7 +1496,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
/* bail if the object is closed or we didn't get the , delimiter */ /* bail if the object is closed or we didn't get the , delimiter */
did_parse = 1; did_parse = 1;
if (idx > end_idx) break; if (idx > end_idx) break;
if (str[idx] == '}') { if (str[idx] == '}') {
break; break;
@ -1519,20 +1509,20 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
/* skip whitespace after , delimiter */ /* skip whitespace after , delimiter */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
trailing_delimiter = 1; trailing_delimiter = 1;
}
if (trailing_delimiter) {
raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
} }
if (trailing_delimiter) {
raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
}
} }
/* verify that idx < end_idx, str[idx] should be '}' */ /* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || str[idx] != '}') { if (idx > end_idx || str[idx] != '}') {
if (did_parse) { if (did_parse) {
raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
} else { } else {
raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
} }
goto bail; goto bail;
} }
@ -1605,10 +1595,10 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
/* only loop if the object is non-empty */ /* only loop if the object is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
int trailing_delimiter = 0; int trailing_delimiter = 0;
while (idx <= end_idx) { while (idx <= end_idx) {
PyObject *memokey; PyObject *memokey;
trailing_delimiter = 0; trailing_delimiter = 0;
/* read key */ /* read key */
if (PyUnicode_READ(kind, str, idx) != '"') { if (PyUnicode_READ(kind, str, idx) != '"') {
@ -1670,7 +1660,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
/* bail if the object is closed or we didn't get the , /* bail if the object is closed or we didn't get the ,
delimiter */ delimiter */
did_parse = 1; did_parse = 1;
if (idx > end_idx) break; if (idx > end_idx) break;
if (PyUnicode_READ(kind, str, idx) == '}') { if (PyUnicode_READ(kind, str, idx) == '}') {
break; break;
@ -1683,21 +1673,21 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
/* skip whitespace after , delimiter */ /* skip whitespace after , delimiter */
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
trailing_delimiter = 1; trailing_delimiter = 1;
}
if (trailing_delimiter) {
raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
} }
if (trailing_delimiter) {
raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
goto bail;
}
} }
/* verify that idx < end_idx, str[idx] should be '}' */ /* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
if (did_parse) { if (did_parse) {
raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
} else { } else {
raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
} }
goto bail; goto bail;
} }
@ -1754,9 +1744,9 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t
/* only loop if the array is non-empty */ /* only loop if the array is non-empty */
if (idx <= end_idx && str[idx] != ']') { if (idx <= end_idx && str[idx] != ']') {
int trailing_delimiter = 0; int trailing_delimiter = 0;
while (idx <= end_idx) { while (idx <= end_idx) {
trailing_delimiter = 0; trailing_delimiter = 0;
/* read any JSON term and de-tuplefy the (rval, idx) */ /* read any JSON term and de-tuplefy the (rval, idx) */
val = scan_once_str(s, pystr, idx, &next_idx); val = scan_once_str(s, pystr, idx, &next_idx);
if (val == NULL) { if (val == NULL) {
@ -1785,21 +1775,21 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t
/* skip whitespace after , */ /* skip whitespace after , */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
trailing_delimiter = 1; trailing_delimiter = 1;
}
if (trailing_delimiter) {
raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
goto bail;
} }
if (trailing_delimiter) {
raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
goto bail;
}
} }
/* verify that idx < end_idx, str[idx] should be ']' */ /* verify that idx < end_idx, str[idx] should be ']' */
if (idx > end_idx || str[idx] != ']') { if (idx > end_idx || str[idx] != ']') {
if (PyList_GET_SIZE(rval)) { if (PyList_GET_SIZE(rval)) {
raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
} else { } else {
raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
} }
goto bail; goto bail;
} }
*next_idx_ptr = idx + 1; *next_idx_ptr = idx + 1;
@ -1835,9 +1825,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
/* only loop if the array is non-empty */ /* only loop if the array is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
int trailing_delimiter = 0; int trailing_delimiter = 0;
while (idx <= end_idx) { while (idx <= end_idx) {
trailing_delimiter = 0; trailing_delimiter = 0;
/* read any JSON term */ /* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx); val = scan_once_unicode(s, pystr, idx, &next_idx);
if (val == NULL) { if (val == NULL) {
@ -1866,21 +1856,21 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
/* skip whitespace after , */ /* skip whitespace after , */
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
trailing_delimiter = 1; trailing_delimiter = 1;
}
if (trailing_delimiter) {
raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
goto bail;
} }
if (trailing_delimiter) {
raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
goto bail;
}
} }
/* verify that idx < end_idx, str[idx] should be ']' */ /* verify that idx < end_idx, str[idx] should be ']' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
if (PyList_GET_SIZE(rval)) { if (PyList_GET_SIZE(rval)) {
raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
} else { } else {
raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
} }
goto bail; goto bail;
} }
*next_idx_ptr = idx + 1; *next_idx_ptr = idx + 1;
@ -2150,8 +2140,8 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
Py_ssize_t length = PyString_GET_SIZE(pystr); Py_ssize_t length = PyString_GET_SIZE(pystr);
PyObject *rval = NULL; PyObject *rval = NULL;
int fallthrough = 0; int fallthrough = 0;
if (idx >= length) { if (idx < 0 || idx >= length) {
raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL; return NULL;
} }
switch (str[idx]) { switch (str[idx]) {
@ -2258,8 +2248,8 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
Py_ssize_t length = PyUnicode_GetLength(pystr); Py_ssize_t length = PyUnicode_GetLength(pystr);
PyObject *rval = NULL; PyObject *rval = NULL;
int fallthrough = 0; int fallthrough = 0;
if (idx >= length) { if (idx < 0 || idx >= length) {
raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
return NULL; return NULL;
} }
switch (PyUnicode_READ(kind, str, idx)) { switch (PyUnicode_READ(kind, str, idx)) {
@ -2568,6 +2558,8 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
s->item_sort_key = NULL; s->item_sort_key = NULL;
s->item_sort_kw = NULL; s->item_sort_kw = NULL;
s->Decimal = NULL; s->Decimal = NULL;
s->max_long_size = NULL;
s->min_long_size = NULL;
} }
return (PyObject *)s; return (PyObject *)s;
} }
@ -2576,13 +2568,33 @@ static int
encoder_init(PyObject *self, PyObject *args, PyObject *kwds) encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
{ {
/* initialize Encoder object */ /* initialize Encoder object */
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "for_json", "ignore_nan", "Decimal", NULL}; static char *kwlist[] = {
"markers",
"default",
"encoder",
"indent",
"key_separator",
"item_separator",
"sort_keys",
"skipkeys",
"allow_nan",
"key_memo",
"use_decimal",
"namedtuple_as_object",
"tuple_as_array",
"int_as_string_bitcount",
"item_sort_key",
"encoding",
"for_json",
"ignore_nan",
"Decimal",
NULL};
PyEncoderObject *s; PyEncoderObject *s;
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array;
PyObject *bigint_as_string, *item_sort_key, *encoding, *for_json; PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json;
PyObject *ignore_nan, *Decimal; PyObject *ignore_nan, *Decimal;
assert(PyEncoder_Check(self)); assert(PyEncoder_Check(self));
@ -2591,21 +2603,30 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist,
&markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
&sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
&namedtuple_as_object, &tuple_as_array, &bigint_as_string, &namedtuple_as_object, &tuple_as_array,
&item_sort_key, &encoding, &for_json, &ignore_nan, &Decimal)) &int_as_string_bitcount, &item_sort_key, &encoding, &for_json,
&ignore_nan, &Decimal))
return -1; return -1;
Py_INCREF(markers);
s->markers = markers; s->markers = markers;
Py_INCREF(defaultfn);
s->defaultfn = defaultfn; s->defaultfn = defaultfn;
Py_INCREF(encoder);
s->encoder = encoder; s->encoder = encoder;
s->encoding = JSON_ParseEncoding(encoding); s->encoding = JSON_ParseEncoding(encoding);
if (s->encoding == NULL) if (s->encoding == NULL)
return -1; return -1;
Py_INCREF(indent);
s->indent = indent; s->indent = indent;
Py_INCREF(key_separator);
s->key_separator = key_separator; s->key_separator = key_separator;
Py_INCREF(item_separator);
s->item_separator = item_separator; s->item_separator = item_separator;
Py_INCREF(skipkeys);
s->skipkeys_bool = skipkeys; s->skipkeys_bool = skipkeys;
s->skipkeys = PyObject_IsTrue(skipkeys); s->skipkeys = PyObject_IsTrue(skipkeys);
Py_INCREF(key_memo);
s->key_memo = key_memo; s->key_memo = key_memo;
s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
s->allow_or_ignore_nan = ( s->allow_or_ignore_nan = (
@ -2614,10 +2635,38 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
s->use_decimal = PyObject_IsTrue(use_decimal); s->use_decimal = PyObject_IsTrue(use_decimal);
s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
s->tuple_as_array = PyObject_IsTrue(tuple_as_array); s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
s->bigint_as_string = PyObject_IsTrue(bigint_as_string); if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) {
static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8;
int int_as_string_bitcount_val = PyLong_AsLong(int_as_string_bitcount);
if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < long_long_bitsize) {
s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << int_as_string_bitcount_val);
s->min_long_size = PyLong_FromLongLong(-1LL << int_as_string_bitcount_val);
if (s->min_long_size == NULL || s->max_long_size == NULL) {
return -1;
}
}
else {
PyErr_Format(PyExc_TypeError,
"int_as_string_bitcount (%d) must be greater than 0 and less than the number of bits of a `long long` type (%u bits)",
int_as_string_bitcount_val, long_long_bitsize);
return -1;
}
}
else if (int_as_string_bitcount == Py_None) {
Py_INCREF(Py_None);
s->max_long_size = Py_None;
Py_INCREF(Py_None);
s->min_long_size = Py_None;
}
else {
PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer");
return -1;
}
if (item_sort_key != Py_None) { if (item_sort_key != Py_None) {
if (!PyCallable_Check(item_sort_key)) if (!PyCallable_Check(item_sort_key)) {
PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable");
return -1;
}
} }
else if (PyObject_IsTrue(sort_keys)) { else if (PyObject_IsTrue(sort_keys)) {
static PyObject *itemgetter0 = NULL; static PyObject *itemgetter0 = NULL;
@ -2643,22 +2692,14 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key))
return -1; return -1;
} }
Py_INCREF(sort_keys);
s->sort_keys = sort_keys; s->sort_keys = sort_keys;
Py_INCREF(item_sort_key);
s->item_sort_key = item_sort_key; s->item_sort_key = item_sort_key;
Py_INCREF(Decimal);
s->Decimal = Decimal; s->Decimal = Decimal;
s->for_json = PyObject_IsTrue(for_json); s->for_json = PyObject_IsTrue(for_json);
Py_INCREF(s->markers);
Py_INCREF(s->defaultfn);
Py_INCREF(s->encoder);
Py_INCREF(s->indent);
Py_INCREF(s->key_separator);
Py_INCREF(s->item_separator);
Py_INCREF(s->key_memo);
Py_INCREF(s->skipkeys_bool);
Py_INCREF(s->sort_keys);
Py_INCREF(s->item_sort_key);
Py_INCREF(s->Decimal);
return 0; return 0;
} }
@ -2801,11 +2842,9 @@ encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ss
else if (PyInt_Check(obj) || PyLong_Check(obj)) { else if (PyInt_Check(obj) || PyLong_Check(obj)) {
PyObject *encoded = PyObject_Str(obj); PyObject *encoded = PyObject_Str(obj);
if (encoded != NULL) { if (encoded != NULL) {
if (s->bigint_as_string) { encoded = maybe_quote_bigint(s, encoded, obj);
encoded = maybe_quote_bigint(encoded, obj); if (encoded == NULL)
if (encoded == NULL) break;
break;
}
rv = _steal_accumulate(rval, encoded); rv = _steal_accumulate(rval, encoded);
} }
} }
@ -3031,6 +3070,7 @@ encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_s
bail: bail:
Py_XDECREF(encoded); Py_XDECREF(encoded);
Py_XDECREF(items); Py_XDECREF(items);
Py_XDECREF(item);
Py_XDECREF(iter); Py_XDECREF(iter);
Py_XDECREF(kstr); Py_XDECREF(kstr);
Py_XDECREF(ident); Py_XDECREF(ident);
@ -3157,6 +3197,8 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg)
Py_VISIT(s->sort_keys); Py_VISIT(s->sort_keys);
Py_VISIT(s->item_sort_kw); Py_VISIT(s->item_sort_kw);
Py_VISIT(s->item_sort_key); Py_VISIT(s->item_sort_key);
Py_VISIT(s->max_long_size);
Py_VISIT(s->min_long_size);
Py_VISIT(s->Decimal); Py_VISIT(s->Decimal);
return 0; return 0;
} }
@ -3180,6 +3222,8 @@ encoder_clear(PyObject *self)
Py_CLEAR(s->sort_keys); Py_CLEAR(s->sort_keys);
Py_CLEAR(s->item_sort_kw); Py_CLEAR(s->item_sort_kw);
Py_CLEAR(s->item_sort_key); Py_CLEAR(s->item_sort_key);
Py_CLEAR(s->max_long_size);
Py_CLEAR(s->min_long_size);
Py_CLEAR(s->Decimal); Py_CLEAR(s->Decimal);
return 0; return 0;
} }

View file

@ -20,7 +20,10 @@ if sys.version_info[0] < 3:
else: else:
PY3 = True PY3 = True
from imp import reload as reload_module if sys.version_info[:2] >= (3, 4):
from importlib import reload as reload_module
else:
from imp import reload as reload_module
import codecs import codecs
def b(s): def b(s):
return codecs.latin_1_encode(s)[0] return codecs.latin_1_encode(s)[0]

View file

@ -281,7 +281,7 @@ class JSONDecoder(object):
+---------------+-------------------+ +---------------+-------------------+
| array | list | | array | list |
+---------------+-------------------+ +---------------+-------------------+
| string | unicode | | string | str, unicode |
+---------------+-------------------+ +---------------+-------------------+
| number (int) | int, long | | number (int) | int, long |
+---------------+-------------------+ +---------------+-------------------+
@ -384,6 +384,17 @@ class JSONDecoder(object):
have extraneous data at the end. have extraneous data at the end.
""" """
if idx < 0:
# Ensure that raw_decode bails on negative indexes, the regex
# would otherwise mask this behavior. #98
raise JSONDecodeError('Expecting value', s, idx)
if _PY3 and not isinstance(s, text_type): if _PY3 and not isinstance(s, text_type):
raise TypeError("Input string must be text, not bytes") raise TypeError("Input string must be text, not bytes")
# strip UTF-8 bom
if len(s) > idx:
ord0 = ord(s[idx])
if ord0 == 0xfeff:
idx += 1
elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
idx += 3
return self.scan_once(s, idx=_w(s, idx).end()) return self.scan_once(s, idx=_w(s, idx).end())

View file

@ -18,7 +18,7 @@ from simplejson.decoder import PosInf
#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') #ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
# This is required because u() will mangle the string and ur'' isn't valid # This is required because u() will mangle the string and ur'' isn't valid
# python3 syntax # python3 syntax
ESCAPE = re.compile(u('[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')) ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]') HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = { ESCAPE_DCT = {
@ -116,12 +116,14 @@ class JSONEncoder(object):
""" """
item_separator = ', ' item_separator = ', '
key_separator = ': ' key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True, def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False, check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None, indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True, use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False, tuple_as_array=True, bigint_as_string=False,
item_sort_key=None, for_json=False, ignore_nan=False): item_sort_key=None, for_json=False, ignore_nan=False,
int_as_string_bitcount=None):
"""Constructor for JSONEncoder, with sensible defaults. """Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt If skipkeys is false, then it is a TypeError to attempt
@ -180,6 +182,10 @@ class JSONEncoder(object):
or lower than -2**53 will be encoded as strings. This is to avoid the or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise. rounding that happens in Javascript otherwise.
If int_as_string_bitcount is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, item_sort_key is a callable used to sort the items in If specified, item_sort_key is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. in alphabetical order by key.
@ -207,6 +213,7 @@ class JSONEncoder(object):
self.item_sort_key = item_sort_key self.item_sort_key = item_sort_key
self.for_json = for_json self.for_json = for_json
self.ignore_nan = ignore_nan self.ignore_nan = ignore_nan
self.int_as_string_bitcount = int_as_string_bitcount
if indent is not None and not isinstance(indent, string_types): if indent is not None and not isinstance(indent, string_types):
indent = indent * ' ' indent = indent * ' '
self.indent = indent self.indent = indent
@ -265,7 +272,7 @@ class JSONEncoder(object):
if self.ensure_ascii: if self.ensure_ascii:
return ''.join(chunks) return ''.join(chunks)
else: else:
return u('').join(chunks) return u''.join(chunks)
def iterencode(self, o, _one_shot=False): def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string """Encode the given object and yield each string
@ -315,8 +322,9 @@ class JSONEncoder(object):
return text return text
key_memo = {} key_memo = {}
int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None if (_one_shot and c_make_encoder is not None
and self.indent is None): and self.indent is None):
_iterencode = c_make_encoder( _iterencode = c_make_encoder(
@ -324,17 +332,17 @@ class JSONEncoder(object):
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan, key_memo, self.use_decimal, self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array, self.namedtuple_as_object, self.tuple_as_array,
self.bigint_as_string, self.item_sort_key, int_as_string_bitcount,
self.encoding, self.for_json, self.ignore_nan, self.item_sort_key, self.encoding, self.for_json,
Decimal) self.ignore_nan, Decimal)
else: else:
_iterencode = _make_iterencode( _iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr, markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal, self.skipkeys, _one_shot, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array, self.namedtuple_as_object, self.tuple_as_array,
self.bigint_as_string, self.item_sort_key, int_as_string_bitcount,
self.encoding, self.for_json, self.item_sort_key, self.encoding, self.for_json,
Decimal=Decimal) Decimal=Decimal)
try: try:
return _iterencode(o, 0) return _iterencode(o, 0)
@ -358,7 +366,7 @@ class JSONEncoderForHTML(JSONEncoder):
if self.ensure_ascii: if self.ensure_ascii:
return ''.join(chunks) return ''.join(chunks)
else: else:
return u('').join(chunks) return u''.join(chunks)
def iterencode(self, o, _one_shot=False): def iterencode(self, o, _one_shot=False):
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
@ -372,7 +380,8 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_use_decimal, _namedtuple_as_object, _tuple_as_array, _use_decimal, _namedtuple_as_object, _tuple_as_array,
_bigint_as_string, _item_sort_key, _encoding, _for_json, _int_as_string_bitcount, _item_sort_key,
_encoding,_for_json,
## HACK: hand-optimized bytecode; turn globals into locals ## HACK: hand-optimized bytecode; turn globals into locals
_PY3=PY3, _PY3=PY3,
ValueError=ValueError, ValueError=ValueError,
@ -392,6 +401,26 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif _sort_keys and not _item_sort_key: elif _sort_keys and not _item_sort_key:
_item_sort_key = itemgetter(0) _item_sort_key = itemgetter(0)
if (_int_as_string_bitcount is not None and
(_int_as_string_bitcount <= 0 or
not isinstance(_int_as_string_bitcount, integer_types))):
raise TypeError("int_as_string_bitcount must be a positive integer")
def _encode_int(value):
skip_quoting = (
_int_as_string_bitcount is None
or
_int_as_string_bitcount < 1
)
if (
skip_quoting or
(-1 << _int_as_string_bitcount)
< value <
(1 << _int_as_string_bitcount)
):
return str(value)
return '"' + str(value) + '"'
def _iterencode_list(lst, _current_indent_level): def _iterencode_list(lst, _current_indent_level):
if not lst: if not lst:
yield '[]' yield '[]'
@ -426,10 +455,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif value is False: elif value is False:
yield buf + 'false' yield buf + 'false'
elif isinstance(value, integer_types): elif isinstance(value, integer_types):
yield ((buf + str(value)) yield buf + _encode_int(value)
if (not _bigint_as_string or
(-1 << 53) < value < (1 << 53))
else (buf + '"' + str(value) + '"'))
elif isinstance(value, float): elif isinstance(value, float):
yield buf + _floatstr(value) yield buf + _floatstr(value)
elif _use_decimal and isinstance(value, Decimal): elif _use_decimal and isinstance(value, Decimal):
@ -540,10 +566,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif value is False: elif value is False:
yield 'false' yield 'false'
elif isinstance(value, integer_types): elif isinstance(value, integer_types):
yield (str(value) yield _encode_int(value)
if (not _bigint_as_string or
(-1 << 53) < value < (1 << 53))
else ('"' + str(value) + '"'))
elif isinstance(value, float): elif isinstance(value, float):
yield _floatstr(value) yield _floatstr(value)
elif _use_decimal and isinstance(value, Decimal): elif _use_decimal and isinstance(value, Decimal):
@ -585,10 +608,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif o is False: elif o is False:
yield 'false' yield 'false'
elif isinstance(o, integer_types): elif isinstance(o, integer_types):
yield (str(o) yield _encode_int(o)
if (not _bigint_as_string or
(-1 << 53) < o < (1 << 53))
else ('"' + str(o) + '"'))
elif isinstance(o, float): elif isinstance(o, float):
yield _floatstr(o) yield _floatstr(o)
else: else:

View file

@ -41,6 +41,9 @@ class JSONDecodeError(ValueError):
else: else:
self.endlineno, self.endcolno = None, None self.endlineno, self.endcolno = None, None
def __reduce__(self):
return self.__class__, (self.msg, self.doc, self.pos, self.end)
def linecol(doc, pos): def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1 lineno = doc.count('\n', 0, pos) + 1
@ -115,6 +118,11 @@ def py_make_scanner(context):
raise JSONDecodeError(errmsg, string, idx) raise JSONDecodeError(errmsg, string, idx)
def scan_once(string, idx): def scan_once(string, idx):
if idx < 0:
# Ensure the same behavior as the C speedup, otherwise
# this would work for *some* negative string indices due
# to the behavior of __getitem__ for strings. #98
raise JSONDecodeError('Expecting value', string, idx)
try: try:
return _scan_once(string, idx) return _scan_once(string, idx)
finally: finally:

View file

@ -3,19 +3,16 @@ import unittest
import doctest import doctest
import sys import sys
class OptionalExtensionTestSuite(unittest.TestSuite):
class NoExtensionTestSuite(unittest.TestSuite):
def run(self, result): def run(self, result):
import simplejson import simplejson
run = unittest.TestSuite.run simplejson._toggle_speedups(False)
run(self, result) result = unittest.TestSuite.run(self, result)
if simplejson._import_c_make_encoder() is None: simplejson._toggle_speedups(True)
TestMissingSpeedups().run(result)
else:
simplejson._toggle_speedups(False)
run(self, result)
simplejson._toggle_speedups(True)
return result return result
class TestMissingSpeedups(unittest.TestCase): class TestMissingSpeedups(unittest.TestCase):
def runTest(self): def runTest(self):
if hasattr(sys, 'pypy_translation_info'): if hasattr(sys, 'pypy_translation_info'):
@ -23,6 +20,7 @@ class TestMissingSpeedups(unittest.TestCase):
elif hasattr(self, 'skipTest'): elif hasattr(self, 'skipTest'):
self.skipTest('_speedups.so is missing!') self.skipTest('_speedups.so is missing!')
def additional_tests(suite=None): def additional_tests(suite=None):
import simplejson import simplejson
import simplejson.encoder import simplejson.encoder
@ -36,34 +34,45 @@ def additional_tests(suite=None):
def all_tests_suite(): def all_tests_suite():
suite = unittest.TestLoader().loadTestsFromNames([ def get_suite():
'simplejson.tests.test_bigint_as_string', return additional_tests(
'simplejson.tests.test_check_circular', unittest.TestLoader().loadTestsFromNames([
'simplejson.tests.test_decode', 'simplejson.tests.test_bitsize_int_as_string',
'simplejson.tests.test_default', 'simplejson.tests.test_bigint_as_string',
'simplejson.tests.test_dump', 'simplejson.tests.test_check_circular',
'simplejson.tests.test_encode_basestring_ascii', 'simplejson.tests.test_decode',
'simplejson.tests.test_encode_for_html', 'simplejson.tests.test_default',
'simplejson.tests.test_errors', 'simplejson.tests.test_dump',
'simplejson.tests.test_fail', 'simplejson.tests.test_encode_basestring_ascii',
'simplejson.tests.test_float', 'simplejson.tests.test_encode_for_html',
'simplejson.tests.test_indent', 'simplejson.tests.test_errors',
'simplejson.tests.test_pass1', 'simplejson.tests.test_fail',
'simplejson.tests.test_pass2', 'simplejson.tests.test_float',
'simplejson.tests.test_pass3', 'simplejson.tests.test_indent',
'simplejson.tests.test_recursion', 'simplejson.tests.test_pass1',
'simplejson.tests.test_scanstring', 'simplejson.tests.test_pass2',
'simplejson.tests.test_separators', 'simplejson.tests.test_pass3',
'simplejson.tests.test_speedups', 'simplejson.tests.test_recursion',
'simplejson.tests.test_unicode', 'simplejson.tests.test_scanstring',
'simplejson.tests.test_decimal', 'simplejson.tests.test_separators',
'simplejson.tests.test_tuple', 'simplejson.tests.test_speedups',
'simplejson.tests.test_namedtuple', 'simplejson.tests.test_unicode',
'simplejson.tests.test_tool', 'simplejson.tests.test_decimal',
'simplejson.tests.test_for_json', 'simplejson.tests.test_tuple',
]) 'simplejson.tests.test_namedtuple',
suite = additional_tests(suite) 'simplejson.tests.test_tool',
return OptionalExtensionTestSuite([suite]) 'simplejson.tests.test_for_json',
]))
suite = get_suite()
import simplejson
if simplejson._import_c_make_encoder() is None:
suite.addTest(TestMissingSpeedups())
else:
suite = unittest.TestSuite([
suite,
NoExtensionTestSuite([get_suite()]),
])
return suite
def main(): def main():

View file

@ -1,7 +1,7 @@
from unittest import TestCase from unittest import TestCase
import simplejson as json import simplejson as json
from simplejson.compat import long_type
class TestBigintAsString(TestCase): class TestBigintAsString(TestCase):
# Python 2.5, at least the one that ships on Mac OS X, calculates # Python 2.5, at least the one that ships on Mac OS X, calculates
@ -15,44 +15,53 @@ class TestBigintAsString(TestCase):
((-1 << 53) - 1, '-9007199254740993'), ((-1 << 53) - 1, '-9007199254740993'),
((-1 << 53) + 1, -9007199254740991)] ((-1 << 53) + 1, -9007199254740991)]
options = (
{"bigint_as_string": True},
{"int_as_string_bitcount": 53}
)
def test_ints(self): def test_ints(self):
for val, expect in self.values: for opts in self.options:
self.assertEqual( for val, expect in self.values:
val, self.assertEqual(
json.loads(json.dumps(val))) val,
self.assertEqual( json.loads(json.dumps(val)))
expect, self.assertEqual(
json.loads(json.dumps(val, bigint_as_string=True))) expect,
json.loads(json.dumps(val, **opts)))
def test_lists(self): def test_lists(self):
for val, expect in self.values: for opts in self.options:
val = [val, val] for val, expect in self.values:
expect = [expect, expect] val = [val, val]
self.assertEqual( expect = [expect, expect]
val, self.assertEqual(
json.loads(json.dumps(val))) val,
self.assertEqual( json.loads(json.dumps(val)))
expect, self.assertEqual(
json.loads(json.dumps(val, bigint_as_string=True))) expect,
json.loads(json.dumps(val, **opts)))
def test_dicts(self): def test_dicts(self):
for val, expect in self.values: for opts in self.options:
val = {'k': val} for val, expect in self.values:
expect = {'k': expect} val = {'k': val}
self.assertEqual( expect = {'k': expect}
val, self.assertEqual(
json.loads(json.dumps(val))) val,
self.assertEqual( json.loads(json.dumps(val)))
expect, self.assertEqual(
json.loads(json.dumps(val, bigint_as_string=True))) expect,
json.loads(json.dumps(val, **opts)))
def test_dict_keys(self): def test_dict_keys(self):
for val, _ in self.values: for opts in self.options:
expect = {str(val): 'value'} for val, _ in self.values:
val = {val: 'value'} expect = {str(val): 'value'}
self.assertEqual( val = {val: 'value'}
expect, self.assertEqual(
json.loads(json.dumps(val))) expect,
self.assertEqual( json.loads(json.dumps(val)))
expect, self.assertEqual(
json.loads(json.dumps(val, bigint_as_string=True))) expect,
json.loads(json.dumps(val, **opts)))

View file

@ -0,0 +1,73 @@
from unittest import TestCase
import simplejson as json
class TestBitSizeIntAsString(TestCase):
# Python 2.5, at least the one that ships on Mac OS X, calculates
# 2 ** 31 as 0! It manages to calculate 1 << 31 correctly.
values = [
(200, 200),
((1 << 31) - 1, (1 << 31) - 1),
((1 << 31), str(1 << 31)),
((1 << 31) + 1, str((1 << 31) + 1)),
(-100, -100),
((-1 << 31), str(-1 << 31)),
((-1 << 31) - 1, str((-1 << 31) - 1)),
((-1 << 31) + 1, (-1 << 31) + 1),
]
def test_invalid_counts(self):
for n in ['foo', -1, 0, 1.0]:
self.assertRaises(
TypeError,
json.dumps, 0, int_as_string_bitcount=n)
def test_ints_outside_range_fails(self):
self.assertNotEqual(
str(1 << 15),
json.loads(json.dumps(1 << 15, int_as_string_bitcount=16)),
)
def test_ints(self):
for val, expect in self.values:
self.assertEqual(
val,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, int_as_string_bitcount=31)),
)
def test_lists(self):
for val, expect in self.values:
val = [val, val]
expect = [expect, expect]
self.assertEqual(
val,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, int_as_string_bitcount=31)))
def test_dicts(self):
for val, expect in self.values:
val = {'k': val}
expect = {'k': expect}
self.assertEqual(
val,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, int_as_string_bitcount=31)))
def test_dict_keys(self):
for val, _ in self.values:
expect = {str(val): 'value'}
val = {val: 'value'}
self.assertEqual(
expect,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, int_as_string_bitcount=31)))

View file

@ -86,3 +86,14 @@ class TestDecode(TestCase):
self.assertEqual( self.assertEqual(
({'a': {}}, 11), ({'a': {}}, 11),
cls().raw_decode(" \n{\"a\": {}}")) cls().raw_decode(" \n{\"a\": {}}"))
def test_bounds_checking(self):
# https://github.com/simplejson/simplejson/issues/98
j = json.decoder.JSONDecoder()
for i in [4, 5, 6, -1, -2, -3, -4, -5, -6]:
self.assertRaises(ValueError, j.scan_once, '1234', i)
self.assertRaises(ValueError, j.raw_decode, '1234', i)
x, y = sorted(['128931233', '472389423'], key=id)
diff = id(x) - id(y)
self.assertRaises(ValueError, j.scan_once, y, diff)
self.assertRaises(ValueError, j.raw_decode, y, i)

View file

@ -119,3 +119,12 @@ class TestDump(TestCase):
# the C API uses an accumulator that collects after 100,000 appends # the C API uses an accumulator that collects after 100,000 appends
lst = [0] * 100000 lst = [0] * 100000
self.assertEqual(json.loads(json.dumps(lst)), lst) self.assertEqual(json.loads(json.dumps(lst)), lst)
def test_sort_keys(self):
# https://github.com/simplejson/simplejson/issues/106
for num_keys in range(2, 32):
p = dict((str(x), x) for x in range(num_keys))
sio = StringIO()
json.dump(p, sio, sort_keys=True)
self.assertEqual(sio.getvalue(), json.dumps(p, sort_keys=True))
self.assertEqual(json.loads(sio.getvalue()), p)

View file

@ -1,4 +1,4 @@
import sys import sys, pickle
from unittest import TestCase from unittest import TestCase
import simplejson as json import simplejson as json
@ -33,3 +33,19 @@ class TestErrors(TestCase):
self.fail('Expected JSONDecodeError') self.fail('Expected JSONDecodeError')
self.assertEqual(err.lineno, 1) self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, 10) self.assertEqual(err.colno, 10)
def test_error_is_pickable(self):
err = None
try:
json.loads('{}\na\nb')
except json.JSONDecodeError:
err = sys.exc_info()[1]
else:
self.fail('Expected JSONDecodeError')
s = pickle.dumps(err)
e = pickle.loads(s)
self.assertEqual(err.msg, e.msg)
self.assertEqual(err.doc, e.doc)
self.assertEqual(err.pos, e.pos)
self.assertEqual(err.end, e.end)

View file

@ -1,20 +1,39 @@
import sys
import unittest
from unittest import TestCase from unittest import TestCase
from simplejson import encoder, scanner from simplejson import encoder, scanner
def has_speedups(): def has_speedups():
return encoder.c_make_encoder is not None return encoder.c_make_encoder is not None
class TestDecode(TestCase):
def test_make_scanner(self): def skip_if_speedups_missing(func):
def wrapper(*args, **kwargs):
if not has_speedups(): if not has_speedups():
return if hasattr(unittest, 'SkipTest'):
raise unittest.SkipTest("C Extension not available")
else:
sys.stdout.write("C Extension not available")
return
return func(*args, **kwargs)
return wrapper
class TestDecode(TestCase):
@skip_if_speedups_missing
def test_make_scanner(self):
self.assertRaises(AttributeError, scanner.c_make_scanner, 1) self.assertRaises(AttributeError, scanner.c_make_scanner, 1)
@skip_if_speedups_missing
def test_make_encoder(self): def test_make_encoder(self):
if not has_speedups(): self.assertRaises(
return TypeError,
self.assertRaises(TypeError, encoder.c_make_encoder, encoder.c_make_encoder,
None, None,
"\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75", ("\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7"
None) "\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75"),
None
)

View file

@ -1,8 +1,9 @@
import sys import sys
import codecs
from unittest import TestCase from unittest import TestCase
import simplejson as json import simplejson as json
from simplejson.compat import unichr, text_type, b, u from simplejson.compat import unichr, text_type, b, u, BytesIO
class TestUnicode(TestCase): class TestUnicode(TestCase):
def test_encoding1(self): def test_encoding1(self):
@ -143,3 +144,10 @@ class TestUnicode(TestCase):
self.assertEqual( self.assertEqual(
json.dumps(c, ensure_ascii=False), json.dumps(c, ensure_ascii=False),
'"' + c + '"') '"' + c + '"')
def test_strip_bom(self):
content = u"\u3053\u3093\u306b\u3061\u308f"
json_doc = codecs.BOM_UTF8 + b(json.dumps(content))
self.assertEqual(json.load(BytesIO(json_doc)), content)
for doc in json_doc, json_doc.decode('utf8'):
self.assertEqual(json.loads(doc), content)