include python3.2 compatible versions of simplejson, pytz, and tzlocal

This commit is contained in:
Alan Hamlett 2014-06-06 13:55:20 -05:00
parent 4476d0401d
commit 0ae5f37a40
47 changed files with 7484 additions and 2 deletions

View File

@ -39,13 +39,20 @@ except ImportError:
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages'))
from .queue import Queue
from .log import setup_logging
from .project import find_project
from .stats import get_file_stats
from .packages import argparse
from .packages import simplejson as json
from .packages import tzlocal
try:
from .packages import simplejson as json
except:
from .packages import simplejson3 as json
try:
from .packages import tzlocal
except:
from .packages import tzlocal3
log = logging.getLogger(__name__)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
'''
Custom exceptions raised by pytz.
'''
__all__ = [
'UnknownTimeZoneError', 'InvalidTimeError', 'AmbiguousTimeError',
'NonExistentTimeError',
]
class UnknownTimeZoneError(KeyError):
'''Exception raised when pytz is passed an unknown timezone.
>>> isinstance(UnknownTimeZoneError(), LookupError)
True
This class is actually a subclass of KeyError to provide backwards
compatibility with code relying on the undocumented behavior of earlier
pytz releases.
>>> isinstance(UnknownTimeZoneError(), KeyError)
True
'''
pass
class InvalidTimeError(Exception):
'''Base class for invalid time exceptions.'''
class AmbiguousTimeError(InvalidTimeError):
'''Exception raised when attempting to create an ambiguous wallclock time.
At the end of a DST transition period, a particular wallclock time will
occur twice (once before the clocks are set back, once after). Both
possibilities may be correct, unless further information is supplied.
See DstTzInfo.normalize() for more info
'''
class NonExistentTimeError(InvalidTimeError):
'''Exception raised when attempting to create a wallclock time that
cannot exist.
At the start of a DST transition period, the wallclock time jumps forward.
The instants jumped over never occur.
'''

View File

@ -0,0 +1,148 @@
from threading import RLock
try:
from UserDict import DictMixin
except ImportError:
from collections import Mapping as DictMixin
_fill_lock = RLock()
class LazyDict(DictMixin):
"""Dictionary populated on first use."""
data = None
def __getitem__(self, key):
if self.data is None:
_fill_lock.acquire()
try:
if self.data is None:
self._fill()
finally:
_fill_lock.release()
return self.data[key.upper()]
def __contains__(self, key):
if self.data is None:
_fill_lock.acquire()
try:
if self.data is None:
self._fill()
finally:
_fill_lock_release()
return key in self.data
def __iter__(self):
if self.data is None:
_fill_lock.acquire()
try:
if self.data is None:
self._fill()
finally:
_fill_lock.release()
return iter(self.data)
def __len__(self):
if self.data is None:
_fill_lock.acquire()
try:
if self.data is None:
self._fill()
finally:
_fill_lock.release()
return len(self.data)
def keys(self):
if self.data is None:
_fill_lock.acquire()
try:
if self.data is None:
self._fill()
finally:
_fill_lock.release()
return list(self.data.keys())
class LazyList(list):
"""List populated on first use."""
def __new__(cls, fill_iter):
class LazyList(list):
_fill_iter = None
_props = (
'__str__', '__repr__', '__unicode__',
'__hash__', '__sizeof__', '__cmp__', '__nonzero__',
'__lt__', '__le__', '__eq__', '__ne__', '__gt__', '__ge__',
'append', 'count', 'index', 'extend', 'insert', 'pop', 'remove',
'reverse', 'sort', '__add__', '__radd__', '__iadd__', '__mul__',
'__rmul__', '__imul__', '__contains__', '__len__', '__nonzero__',
'__getitem__', '__setitem__', '__delitem__', '__iter__',
'__reversed__', '__getslice__', '__setslice__', '__delslice__')
def lazy(name):
def _lazy(self, *args, **kw):
if self._fill_iter is not None:
_fill_lock.acquire()
try:
if self._fill_iter is not None:
list.extend(self, self._fill_iter)
self._fill_iter = None
finally:
_fill_lock.release()
real = getattr(list, name)
setattr(self.__class__, name, real)
return real(self, *args, **kw)
return _lazy
for name in _props:
setattr(LazyList, name, lazy(name))
new_list = LazyList()
new_list._fill_iter = fill_iter
return new_list
class LazySet(set):
"""Set populated on first use."""
def __new__(cls, fill_iter):
class LazySet(set):
_fill_iter = None
_props = (
'__str__', '__repr__', '__unicode__',
'__hash__', '__sizeof__', '__cmp__', '__nonzero__',
'__lt__', '__le__', '__eq__', '__ne__', '__gt__', '__ge__',
'__contains__', '__len__', '__nonzero__',
'__getitem__', '__setitem__', '__delitem__', '__iter__',
'__sub__', '__and__', '__xor__', '__or__',
'__rsub__', '__rand__', '__rxor__', '__ror__',
'__isub__', '__iand__', '__ixor__', '__ior__',
'add', 'clear', 'copy', 'difference', 'difference_update',
'discard', 'intersection', 'intersection_update', 'isdisjoint',
'issubset', 'issuperset', 'pop', 'remove',
'symmetric_difference', 'symmetric_difference_update',
'union', 'update')
def lazy(name):
def _lazy(self, *args, **kw):
if self._fill_iter is not None:
_fill_lock.acquire()
try:
if self._fill_iter is not None:
for i in self._fill_iter:
set.add(self, i)
self._fill_iter = None
finally:
_fill_lock.release()
real = getattr(set, name)
setattr(self.__class__, name, real)
return real(self, *args, **kw)
return _lazy
for name in _props:
setattr(LazySet, name, lazy(name))
new_set = LazySet()
new_set._fill_iter = fill_iter
return new_set

View File

@ -0,0 +1,127 @@
'''
Reference tzinfo implementations from the Python docs.
Used for testing against as they are only correct for the years
1987 to 2006. Do not use these for real code.
'''
from datetime import tzinfo, timedelta, datetime
from pytz import utc, UTC, HOUR, ZERO
# A class building tzinfo objects for fixed-offset time zones.
# Note that FixedOffset(0, "UTC") is a different way to build a
# UTC tzinfo object.
class FixedOffset(tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name):
self.__offset = timedelta(minutes = offset)
self.__name = name
def utcoffset(self, dt):
return self.__offset
def tzname(self, dt):
return self.__name
def dst(self, dt):
return ZERO
# A class capturing the platform's idea of local time.
import time as _time
STDOFFSET = timedelta(seconds = -_time.timezone)
if _time.daylight:
DSTOFFSET = timedelta(seconds = -_time.altzone)
else:
DSTOFFSET = STDOFFSET
DSTDIFF = DSTOFFSET - STDOFFSET
class LocalTimezone(tzinfo):
def utcoffset(self, dt):
if self._isdst(dt):
return DSTOFFSET
else:
return STDOFFSET
def dst(self, dt):
if self._isdst(dt):
return DSTDIFF
else:
return ZERO
def tzname(self, dt):
return _time.tzname[self._isdst(dt)]
def _isdst(self, dt):
tt = (dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second,
dt.weekday(), 0, -1)
stamp = _time.mktime(tt)
tt = _time.localtime(stamp)
return tt.tm_isdst > 0
Local = LocalTimezone()
# A complete implementation of current DST rules for major US time zones.
def first_sunday_on_or_after(dt):
days_to_go = 6 - dt.weekday()
if days_to_go:
dt += timedelta(days_to_go)
return dt
# In the US, DST starts at 2am (standard time) on the first Sunday in April.
DSTSTART = datetime(1, 4, 1, 2)
# and ends at 2am (DST time; 1am standard time) on the last Sunday of Oct.
# which is the first Sunday on or after Oct 25.
DSTEND = datetime(1, 10, 25, 1)
class USTimeZone(tzinfo):
def __init__(self, hours, reprname, stdname, dstname):
self.stdoffset = timedelta(hours=hours)
self.reprname = reprname
self.stdname = stdname
self.dstname = dstname
def __repr__(self):
return self.reprname
def tzname(self, dt):
if self.dst(dt):
return self.dstname
else:
return self.stdname
def utcoffset(self, dt):
return self.stdoffset + self.dst(dt)
def dst(self, dt):
if dt is None or dt.tzinfo is None:
# An exception may be sensible here, in one or both cases.
# It depends on how you want to treat them. The default
# fromutc() implementation (called by the default astimezone()
# implementation) passes a datetime with dt.tzinfo is self.
return ZERO
assert dt.tzinfo is self
# Find first Sunday in April & the last in October.
start = first_sunday_on_or_after(DSTSTART.replace(year=dt.year))
end = first_sunday_on_or_after(DSTEND.replace(year=dt.year))
# Can't compare naive to aware objects, so strip the timezone from
# dt first.
if start <= dt.replace(tzinfo=None) < end:
return HOUR
else:
return ZERO
Eastern = USTimeZone(-5, "Eastern", "EST", "EDT")
Central = USTimeZone(-6, "Central", "CST", "CDT")
Mountain = USTimeZone(-7, "Mountain", "MST", "MDT")
Pacific = USTimeZone(-8, "Pacific", "PST", "PDT")

View File

@ -0,0 +1,36 @@
# -*- coding: ascii -*-
from doctest import DocTestSuite
import unittest, os, os.path, sys
import warnings
# We test the documentation this way instead of using DocFileSuite so
# we can run the tests under Python 2.3
def test_README():
pass
this_dir = os.path.dirname(__file__)
locs = [
os.path.join(this_dir, os.pardir, 'README.txt'),
os.path.join(this_dir, os.pardir, os.pardir, 'README.txt'),
]
for loc in locs:
if os.path.exists(loc):
test_README.__doc__ = open(loc).read()
break
if test_README.__doc__ is None:
raise RuntimeError('README.txt not found')
def test_suite():
"For the Z3 test runner"
return DocTestSuite()
if __name__ == '__main__':
sys.path.insert(0, os.path.abspath(os.path.join(
this_dir, os.pardir, os.pardir
)))
unittest.main(defaultTest='test_suite')

View File

@ -0,0 +1,813 @@
# -*- coding: ascii -*-
import sys, os, os.path
import unittest, doctest
try:
import pickle as pickle
except ImportError:
import pickle
from datetime import datetime, time, timedelta, tzinfo
import warnings
if __name__ == '__main__':
# Only munge path if invoked as a script. Testrunners should have setup
# the paths already
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, os.pardir)))
import pytz
from pytz import reference
from pytz.tzfile import _byte_string
from pytz.tzinfo import DstTzInfo, StaticTzInfo
# I test for expected version to ensure the correct version of pytz is
# actually being tested.
EXPECTED_VERSION='2013d'
fmt = '%Y-%m-%d %H:%M:%S %Z%z'
NOTIME = timedelta(0)
# GMT is a tzinfo.StaticTzInfo--the class we primarily want to test--while
# UTC is reference implementation. They both have the same timezone meaning.
UTC = pytz.timezone('UTC')
GMT = pytz.timezone('GMT')
assert isinstance(GMT, StaticTzInfo), 'GMT is no longer a StaticTzInfo'
def prettydt(dt):
"""datetime as a string using a known format.
We don't use strftime as it doesn't handle years earlier than 1900
per http://bugs.python.org/issue1777412
"""
if dt.utcoffset() >= timedelta(0):
offset = '+%s' % (dt.utcoffset(),)
else:
offset = '-%s' % (-1 * dt.utcoffset(),)
return '%04d-%02d-%02d %02d:%02d:%02d %s %s' % (
dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second,
dt.tzname(), offset)
try:
str
except NameError:
# Python 3.x doesn't have unicode(), making writing code
# for Python 2.3 and Python 3.x a pain.
str = str
class BasicTest(unittest.TestCase):
def testVersion(self):
# Ensuring the correct version of pytz has been loaded
self.assertEqual(EXPECTED_VERSION, pytz.__version__,
'Incorrect pytz version loaded. Import path is stuffed '
'or this test needs updating. (Wanted %s, got %s)'
% (EXPECTED_VERSION, pytz.__version__)
)
def testGMT(self):
now = datetime.now(tz=GMT)
self.assertTrue(now.utcoffset() == NOTIME)
self.assertTrue(now.dst() == NOTIME)
self.assertTrue(now.timetuple() == now.utctimetuple())
self.assertTrue(now==now.replace(tzinfo=UTC))
def testReferenceUTC(self):
now = datetime.now(tz=UTC)
self.assertTrue(now.utcoffset() == NOTIME)
self.assertTrue(now.dst() == NOTIME)
self.assertTrue(now.timetuple() == now.utctimetuple())
def testUnknownOffsets(self):
# This tzinfo behavior is required to make
# datetime.time.{utcoffset, dst, tzname} work as documented.
dst_tz = pytz.timezone('US/Eastern')
# This information is not known when we don't have a date,
# so return None per API.
self.assertTrue(dst_tz.utcoffset(None) is None)
self.assertTrue(dst_tz.dst(None) is None)
# We don't know the abbreviation, but this is still a valid
# tzname per the Python documentation.
self.assertEqual(dst_tz.tzname(None), 'US/Eastern')
def clearCache(self):
pytz._tzinfo_cache.clear()
def testUnicodeTimezone(self):
# We need to ensure that cold lookups work for both Unicode
# and traditional strings, and that the desired singleton is
# returned.
self.clearCache()
eastern = pytz.timezone(str('US/Eastern'))
self.assertTrue(eastern is pytz.timezone('US/Eastern'))
self.clearCache()
eastern = pytz.timezone('US/Eastern')
self.assertTrue(eastern is pytz.timezone(str('US/Eastern')))
class PicklingTest(unittest.TestCase):
def _roundtrip_tzinfo(self, tz):
p = pickle.dumps(tz)
unpickled_tz = pickle.loads(p)
self.assertTrue(tz is unpickled_tz, '%s did not roundtrip' % tz.zone)
def _roundtrip_datetime(self, dt):
# Ensure that the tzinfo attached to a datetime instance
# is identical to the one returned. This is important for
# DST timezones, as some state is stored in the tzinfo.
tz = dt.tzinfo
p = pickle.dumps(dt)
unpickled_dt = pickle.loads(p)
unpickled_tz = unpickled_dt.tzinfo
self.assertTrue(tz is unpickled_tz, '%s did not roundtrip' % tz.zone)
def testDst(self):
tz = pytz.timezone('Europe/Amsterdam')
dt = datetime(2004, 2, 1, 0, 0, 0)
for localized_tz in list(tz._tzinfos.values()):
self._roundtrip_tzinfo(localized_tz)
self._roundtrip_datetime(dt.replace(tzinfo=localized_tz))
def testRoundtrip(self):
dt = datetime(2004, 2, 1, 0, 0, 0)
for zone in pytz.all_timezones:
tz = pytz.timezone(zone)
self._roundtrip_tzinfo(tz)
def testDatabaseFixes(self):
# Hack the pickle to make it refer to a timezone abbreviation
# that does not match anything. The unpickler should be able
# to repair this case
tz = pytz.timezone('Australia/Melbourne')
p = pickle.dumps(tz)
tzname = tz._tzname
hacked_p = p.replace(_byte_string(tzname), _byte_string('???'))
self.assertNotEqual(p, hacked_p)
unpickled_tz = pickle.loads(hacked_p)
self.assertTrue(tz is unpickled_tz)
# Simulate a database correction. In this case, the incorrect
# data will continue to be used.
p = pickle.dumps(tz)
new_utcoffset = tz._utcoffset.seconds + 42
# Python 3 introduced a new pickle protocol where numbers are stored in
# hexadecimal representation. Here we extract the pickle
# representation of the number for the current Python version.
old_pickle_pattern = pickle.dumps(tz._utcoffset.seconds)[3:-1]
new_pickle_pattern = pickle.dumps(new_utcoffset)[3:-1]
hacked_p = p.replace(old_pickle_pattern, new_pickle_pattern)
self.assertNotEqual(p, hacked_p)
unpickled_tz = pickle.loads(hacked_p)
self.assertEqual(unpickled_tz._utcoffset.seconds, new_utcoffset)
self.assertTrue(tz is not unpickled_tz)
def testOldPickles(self):
# Ensure that applications serializing pytz instances as pickles
# have no troubles upgrading to a new pytz release. These pickles
# where created with pytz2006j
east1 = pickle.loads(_byte_string(
"cpytz\n_p\np1\n(S'US/Eastern'\np2\nI-18000\n"
"I0\nS'EST'\np3\ntRp4\n."
))
east2 = pytz.timezone('US/Eastern')
self.assertTrue(east1 is east2)
# Confirm changes in name munging between 2006j and 2007c cause
# no problems.
pap1 = pickle.loads(_byte_string(
"cpytz\n_p\np1\n(S'America/Port_minus_au_minus_Prince'"
"\np2\nI-17340\nI0\nS'PPMT'\np3\ntRp4\n."))
pap2 = pytz.timezone('America/Port-au-Prince')
self.assertTrue(pap1 is pap2)
gmt1 = pickle.loads(_byte_string(
"cpytz\n_p\np1\n(S'Etc/GMT_plus_10'\np2\ntRp3\n."))
gmt2 = pytz.timezone('Etc/GMT+10')
self.assertTrue(gmt1 is gmt2)
class USEasternDSTStartTestCase(unittest.TestCase):
tzinfo = pytz.timezone('US/Eastern')
# 24 hours before DST changeover
transition_time = datetime(2002, 4, 7, 7, 0, 0, tzinfo=UTC)
# Increase for 'flexible' DST transitions due to 1 minute granularity
# of Python's datetime library
instant = timedelta(seconds=1)
# before transition
before = {
'tzname': 'EST',
'utcoffset': timedelta(hours = -5),
'dst': timedelta(hours = 0),
}
# after transition
after = {
'tzname': 'EDT',
'utcoffset': timedelta(hours = -4),
'dst': timedelta(hours = 1),
}
def _test_tzname(self, utc_dt, wanted):
tzname = wanted['tzname']
dt = utc_dt.astimezone(self.tzinfo)
self.assertEqual(dt.tzname(), tzname,
'Expected %s as tzname for %s. Got %s' % (
tzname, str(utc_dt), dt.tzname()
)
)
def _test_utcoffset(self, utc_dt, wanted):
utcoffset = wanted['utcoffset']
dt = utc_dt.astimezone(self.tzinfo)
self.assertEqual(
dt.utcoffset(), wanted['utcoffset'],
'Expected %s as utcoffset for %s. Got %s' % (
utcoffset, utc_dt, dt.utcoffset()
)
)
def _test_dst(self, utc_dt, wanted):
dst = wanted['dst']
dt = utc_dt.astimezone(self.tzinfo)
self.assertEqual(dt.dst(),dst,
'Expected %s as dst for %s. Got %s' % (
dst, utc_dt, dt.dst()
)
)
def test_arithmetic(self):
utc_dt = self.transition_time
for days in range(-420, 720, 20):
delta = timedelta(days=days)
# Make sure we can get back where we started
dt = utc_dt.astimezone(self.tzinfo)
dt2 = dt + delta
dt2 = dt2 - delta
self.assertEqual(dt, dt2)
# Make sure arithmetic crossing DST boundaries ends
# up in the correct timezone after normalization
utc_plus_delta = (utc_dt + delta).astimezone(self.tzinfo)
local_plus_delta = self.tzinfo.normalize(dt + delta)
self.assertEqual(
prettydt(utc_plus_delta),
prettydt(local_plus_delta),
'Incorrect result for delta==%d days. Wanted %r. Got %r'%(
days,
prettydt(utc_plus_delta),
prettydt(local_plus_delta),
)
)
def _test_all(self, utc_dt, wanted):
self._test_utcoffset(utc_dt, wanted)
self._test_tzname(utc_dt, wanted)
self._test_dst(utc_dt, wanted)
def testDayBefore(self):
self._test_all(
self.transition_time - timedelta(days=1), self.before
)
def testTwoHoursBefore(self):
self._test_all(
self.transition_time - timedelta(hours=2), self.before
)
def testHourBefore(self):
self._test_all(
self.transition_time - timedelta(hours=1), self.before
)
def testInstantBefore(self):
self._test_all(
self.transition_time - self.instant, self.before
)
def testTransition(self):
self._test_all(
self.transition_time, self.after
)
def testInstantAfter(self):
self._test_all(
self.transition_time + self.instant, self.after
)
def testHourAfter(self):
self._test_all(
self.transition_time + timedelta(hours=1), self.after
)
def testTwoHoursAfter(self):
self._test_all(
self.transition_time + timedelta(hours=1), self.after
)
def testDayAfter(self):
self._test_all(
self.transition_time + timedelta(days=1), self.after
)
class USEasternDSTEndTestCase(USEasternDSTStartTestCase):
tzinfo = pytz.timezone('US/Eastern')
transition_time = datetime(2002, 10, 27, 6, 0, 0, tzinfo=UTC)
before = {
'tzname': 'EDT',
'utcoffset': timedelta(hours = -4),
'dst': timedelta(hours = 1),
}
after = {
'tzname': 'EST',
'utcoffset': timedelta(hours = -5),
'dst': timedelta(hours = 0),
}
class USEasternEPTStartTestCase(USEasternDSTStartTestCase):
transition_time = datetime(1945, 8, 14, 23, 0, 0, tzinfo=UTC)
before = {
'tzname': 'EWT',
'utcoffset': timedelta(hours = -4),
'dst': timedelta(hours = 1),
}
after = {
'tzname': 'EPT',
'utcoffset': timedelta(hours = -4),
'dst': timedelta(hours = 1),
}
class USEasternEPTEndTestCase(USEasternDSTStartTestCase):
transition_time = datetime(1945, 9, 30, 6, 0, 0, tzinfo=UTC)
before = {
'tzname': 'EPT',
'utcoffset': timedelta(hours = -4),
'dst': timedelta(hours = 1),
}
after = {
'tzname': 'EST',
'utcoffset': timedelta(hours = -5),
'dst': timedelta(hours = 0),
}
class WarsawWMTEndTestCase(USEasternDSTStartTestCase):
# In 1915, Warsaw changed from Warsaw to Central European time.
# This involved the clocks being set backwards, causing a end-of-DST
# like situation without DST being involved.
tzinfo = pytz.timezone('Europe/Warsaw')
transition_time = datetime(1915, 8, 4, 22, 36, 0, tzinfo=UTC)
before = {
'tzname': 'WMT',
'utcoffset': timedelta(hours=1, minutes=24),
'dst': timedelta(0),
}
after = {
'tzname': 'CET',
'utcoffset': timedelta(hours=1),
'dst': timedelta(0),
}
class VilniusWMTEndTestCase(USEasternDSTStartTestCase):
# At the end of 1916, Vilnius changed timezones putting its clock
# forward by 11 minutes 35 seconds. Neither timezone was in DST mode.
tzinfo = pytz.timezone('Europe/Vilnius')
instant = timedelta(seconds=31)
transition_time = datetime(1916, 12, 31, 22, 36, 00, tzinfo=UTC)
before = {
'tzname': 'WMT',
'utcoffset': timedelta(hours=1, minutes=24),
'dst': timedelta(0),
}
after = {
'tzname': 'KMT',
'utcoffset': timedelta(hours=1, minutes=36), # Really 1:35:36
'dst': timedelta(0),
}
class VilniusCESTStartTestCase(USEasternDSTStartTestCase):
# In 1941, Vilnius changed from MSG to CEST, switching to summer
# time while simultaneously reducing its UTC offset by two hours,
# causing the clocks to go backwards for this summer time
# switchover.
tzinfo = pytz.timezone('Europe/Vilnius')
transition_time = datetime(1941, 6, 23, 21, 00, 00, tzinfo=UTC)
before = {
'tzname': 'MSK',
'utcoffset': timedelta(hours=3),
'dst': timedelta(0),
}
after = {
'tzname': 'CEST',
'utcoffset': timedelta(hours=2),
'dst': timedelta(hours=1),
}
class LondonHistoryStartTestCase(USEasternDSTStartTestCase):
# The first known timezone transition in London was in 1847 when
# clocks where synchronized to GMT. However, we currently only
# understand v1 format tzfile(5) files which does handle years
# this far in the past, so our earliest known transition is in
# 1916.
tzinfo = pytz.timezone('Europe/London')
# transition_time = datetime(1847, 12, 1, 1, 15, 00, tzinfo=UTC)
# before = {
# 'tzname': 'LMT',
# 'utcoffset': timedelta(minutes=-75),
# 'dst': timedelta(0),
# }
# after = {
# 'tzname': 'GMT',
# 'utcoffset': timedelta(0),
# 'dst': timedelta(0),
# }
transition_time = datetime(1916, 5, 21, 2, 00, 00, tzinfo=UTC)
before = {
'tzname': 'GMT',
'utcoffset': timedelta(0),
'dst': timedelta(0),
}
after = {
'tzname': 'BST',
'utcoffset': timedelta(hours=1),
'dst': timedelta(hours=1),
}
class LondonHistoryEndTestCase(USEasternDSTStartTestCase):
# Timezone switchovers are projected into the future, even
# though no official statements exist or could be believed even
# if they did exist. We currently only check the last known
# transition in 2037, as we are still using v1 format tzfile(5)
# files.
tzinfo = pytz.timezone('Europe/London')
# transition_time = datetime(2499, 10, 25, 1, 0, 0, tzinfo=UTC)
transition_time = datetime(2037, 10, 25, 1, 0, 0, tzinfo=UTC)
before = {
'tzname': 'BST',
'utcoffset': timedelta(hours=1),
'dst': timedelta(hours=1),
}
after = {
'tzname': 'GMT',
'utcoffset': timedelta(0),
'dst': timedelta(0),
}
class NoumeaHistoryStartTestCase(USEasternDSTStartTestCase):
# Noumea adopted a whole hour offset in 1912. Previously
# it was 11 hours, 5 minutes and 48 seconds off UTC. However,
# due to limitations of the Python datetime library, we need
# to round that to 11 hours 6 minutes.
tzinfo = pytz.timezone('Pacific/Noumea')
transition_time = datetime(1912, 1, 12, 12, 54, 12, tzinfo=UTC)
before = {
'tzname': 'LMT',
'utcoffset': timedelta(hours=11, minutes=6),
'dst': timedelta(0),
}
after = {
'tzname': 'NCT',
'utcoffset': timedelta(hours=11),
'dst': timedelta(0),
}
class NoumeaDSTEndTestCase(USEasternDSTStartTestCase):
# Noumea dropped DST in 1997.
tzinfo = pytz.timezone('Pacific/Noumea')
transition_time = datetime(1997, 3, 1, 15, 00, 00, tzinfo=UTC)
before = {
'tzname': 'NCST',
'utcoffset': timedelta(hours=12),
'dst': timedelta(hours=1),
}
after = {
'tzname': 'NCT',
'utcoffset': timedelta(hours=11),
'dst': timedelta(0),
}
class NoumeaNoMoreDSTTestCase(NoumeaDSTEndTestCase):
# Noumea dropped DST in 1997. Here we test that it stops occuring.
transition_time = (
NoumeaDSTEndTestCase.transition_time + timedelta(days=365*10))
before = NoumeaDSTEndTestCase.after
after = NoumeaDSTEndTestCase.after
class TahitiTestCase(USEasternDSTStartTestCase):
# Tahiti has had a single transition in its history.
tzinfo = pytz.timezone('Pacific/Tahiti')
transition_time = datetime(1912, 10, 1, 9, 58, 16, tzinfo=UTC)
before = {
'tzname': 'LMT',
'utcoffset': timedelta(hours=-9, minutes=-58),
'dst': timedelta(0),
}
after = {
'tzname': 'TAHT',
'utcoffset': timedelta(hours=-10),
'dst': timedelta(0),
}
class SamoaInternationalDateLineChange(USEasternDSTStartTestCase):
# At the end of 2011, Samoa will switch from being east of the
# international dateline to the west. There will be no Dec 30th
# 2011 and it will switch from UTC-10 to UTC+14.
tzinfo = pytz.timezone('Pacific/Apia')
transition_time = datetime(2011, 12, 30, 10, 0, 0, tzinfo=UTC)
before = {
'tzname': 'WSDT',
'utcoffset': timedelta(hours=-10),
'dst': timedelta(hours=1),
}
after = {
'tzname': 'WSDT',
'utcoffset': timedelta(hours=14),
'dst': timedelta(hours=1),
}
class ReferenceUSEasternDSTStartTestCase(USEasternDSTStartTestCase):
tzinfo = reference.Eastern
def test_arithmetic(self):
# Reference implementation cannot handle this
pass
class ReferenceUSEasternDSTEndTestCase(USEasternDSTEndTestCase):
tzinfo = reference.Eastern
def testHourBefore(self):
# Python's datetime library has a bug, where the hour before
# a daylight savings transition is one hour out. For example,
# at the end of US/Eastern daylight savings time, 01:00 EST
# occurs twice (once at 05:00 UTC and once at 06:00 UTC),
# whereas the first should actually be 01:00 EDT.
# Note that this bug is by design - by accepting this ambiguity
# for one hour one hour per year, an is_dst flag on datetime.time
# became unnecessary.
self._test_all(
self.transition_time - timedelta(hours=1), self.after
)
def testInstantBefore(self):
self._test_all(
self.transition_time - timedelta(seconds=1), self.after
)
def test_arithmetic(self):
# Reference implementation cannot handle this
pass
class LocalTestCase(unittest.TestCase):
def testLocalize(self):
loc_tz = pytz.timezone('Europe/Amsterdam')
loc_time = loc_tz.localize(datetime(1930, 5, 10, 0, 0, 0))
# Actually +00:19:32, but Python datetime rounds this
self.assertEqual(loc_time.strftime('%Z%z'), 'AMT+0020')
loc_time = loc_tz.localize(datetime(1930, 5, 20, 0, 0, 0))
# Actually +00:19:32, but Python datetime rounds this
self.assertEqual(loc_time.strftime('%Z%z'), 'NST+0120')
loc_time = loc_tz.localize(datetime(1940, 5, 10, 0, 0, 0))
self.assertEqual(loc_time.strftime('%Z%z'), 'NET+0020')
loc_time = loc_tz.localize(datetime(1940, 5, 20, 0, 0, 0))
self.assertEqual(loc_time.strftime('%Z%z'), 'CEST+0200')
loc_time = loc_tz.localize(datetime(2004, 2, 1, 0, 0, 0))
self.assertEqual(loc_time.strftime('%Z%z'), 'CET+0100')
loc_time = loc_tz.localize(datetime(2004, 4, 1, 0, 0, 0))
self.assertEqual(loc_time.strftime('%Z%z'), 'CEST+0200')
tz = pytz.timezone('Europe/Amsterdam')
loc_time = loc_tz.localize(datetime(1943, 3, 29, 1, 59, 59))
self.assertEqual(loc_time.strftime('%Z%z'), 'CET+0100')
# Switch to US
loc_tz = pytz.timezone('US/Eastern')
# End of DST ambiguity check
loc_time = loc_tz.localize(datetime(1918, 10, 27, 1, 59, 59), is_dst=1)
self.assertEqual(loc_time.strftime('%Z%z'), 'EDT-0400')
loc_time = loc_tz.localize(datetime(1918, 10, 27, 1, 59, 59), is_dst=0)
self.assertEqual(loc_time.strftime('%Z%z'), 'EST-0500')
self.assertRaises(pytz.AmbiguousTimeError,
loc_tz.localize, datetime(1918, 10, 27, 1, 59, 59), is_dst=None
)
# Start of DST non-existent times
loc_time = loc_tz.localize(datetime(1918, 3, 31, 2, 0, 0), is_dst=0)
self.assertEqual(loc_time.strftime('%Z%z'), 'EST-0500')
loc_time = loc_tz.localize(datetime(1918, 3, 31, 2, 0, 0), is_dst=1)
self.assertEqual(loc_time.strftime('%Z%z'), 'EDT-0400')
self.assertRaises(pytz.NonExistentTimeError,
loc_tz.localize, datetime(1918, 3, 31, 2, 0, 0), is_dst=None
)
# Weird changes - war time and peace time both is_dst==True
loc_time = loc_tz.localize(datetime(1942, 2, 9, 3, 0, 0))
self.assertEqual(loc_time.strftime('%Z%z'), 'EWT-0400')
loc_time = loc_tz.localize(datetime(1945, 8, 14, 19, 0, 0))
self.assertEqual(loc_time.strftime('%Z%z'), 'EPT-0400')
loc_time = loc_tz.localize(datetime(1945, 9, 30, 1, 0, 0), is_dst=1)
self.assertEqual(loc_time.strftime('%Z%z'), 'EPT-0400')
loc_time = loc_tz.localize(datetime(1945, 9, 30, 1, 0, 0), is_dst=0)
self.assertEqual(loc_time.strftime('%Z%z'), 'EST-0500')
def testNormalize(self):
tz = pytz.timezone('US/Eastern')
dt = datetime(2004, 4, 4, 7, 0, 0, tzinfo=UTC).astimezone(tz)
dt2 = dt - timedelta(minutes=10)
self.assertEqual(
dt2.strftime('%Y-%m-%d %H:%M:%S %Z%z'),
'2004-04-04 02:50:00 EDT-0400'
)
dt2 = tz.normalize(dt2)
self.assertEqual(
dt2.strftime('%Y-%m-%d %H:%M:%S %Z%z'),
'2004-04-04 01:50:00 EST-0500'
)
def testPartialMinuteOffsets(self):
# utcoffset in Amsterdam was not a whole minute until 1937
# However, we fudge this by rounding them, as the Python
# datetime library
tz = pytz.timezone('Europe/Amsterdam')
utc_dt = datetime(1914, 1, 1, 13, 40, 28, tzinfo=UTC) # correct
utc_dt = utc_dt.replace(second=0) # But we need to fudge it
loc_dt = utc_dt.astimezone(tz)
self.assertEqual(
loc_dt.strftime('%Y-%m-%d %H:%M:%S %Z%z'),
'1914-01-01 14:00:00 AMT+0020'
)
# And get back...
utc_dt = loc_dt.astimezone(UTC)
self.assertEqual(
utc_dt.strftime('%Y-%m-%d %H:%M:%S %Z%z'),
'1914-01-01 13:40:00 UTC+0000'
)
def no_testCreateLocaltime(self):
# It would be nice if this worked, but it doesn't.
tz = pytz.timezone('Europe/Amsterdam')
dt = datetime(2004, 10, 31, 2, 0, 0, tzinfo=tz)
self.assertEqual(
dt.strftime(fmt),
'2004-10-31 02:00:00 CET+0100'
)
class CommonTimezonesTestCase(unittest.TestCase):
def test_bratislava(self):
# Bratislava is the default timezone for Slovakia, but our
# heuristics where not adding it to common_timezones. Ideally,
# common_timezones should be populated from zone.tab at runtime,
# but I'm hesitant to pay the startup cost as loading the list
# on demand whilst remaining backwards compatible seems
# difficult.
self.assertTrue('Europe/Bratislava' in pytz.common_timezones)
self.assertTrue('Europe/Bratislava' in pytz.common_timezones_set)
def test_us_eastern(self):
self.assertTrue('US/Eastern' in pytz.common_timezones)
self.assertTrue('US/Eastern' in pytz.common_timezones_set)
def test_belfast(self):
# Belfast uses London time.
self.assertTrue('Europe/Belfast' in pytz.all_timezones_set)
self.assertFalse('Europe/Belfast' in pytz.common_timezones)
self.assertFalse('Europe/Belfast' in pytz.common_timezones_set)
class BaseTzInfoTestCase:
'''Ensure UTC, StaticTzInfo and DstTzInfo work consistently.
These tests are run for each type of tzinfo.
'''
tz = None # override
tz_class = None # override
def test_expectedclass(self):
self.assertTrue(isinstance(self.tz, self.tz_class))
def test_fromutc(self):
# naive datetime.
dt1 = datetime(2011, 10, 31)
# localized datetime, same timezone.
dt2 = self.tz.localize(dt1)
# Both should give the same results. Note that the standard
# Python tzinfo.fromutc() only supports the second.
for dt in [dt1, dt2]:
loc_dt = self.tz.fromutc(dt)
loc_dt2 = pytz.utc.localize(dt1).astimezone(self.tz)
self.assertEqual(loc_dt, loc_dt2)
# localized datetime, different timezone.
new_tz = pytz.timezone('Europe/Paris')
self.assertTrue(self.tz is not new_tz)
dt3 = new_tz.localize(dt1)
self.assertRaises(ValueError, self.tz.fromutc, dt3)
def test_normalize(self):
other_tz = pytz.timezone('Europe/Paris')
self.assertTrue(self.tz is not other_tz)
dt = datetime(2012, 3, 26, 12, 0)
other_dt = other_tz.localize(dt)
local_dt = self.tz.normalize(other_dt)
self.assertTrue(local_dt.tzinfo is not other_dt.tzinfo)
self.assertNotEqual(
local_dt.replace(tzinfo=None), other_dt.replace(tzinfo=None))
def test_astimezone(self):
other_tz = pytz.timezone('Europe/Paris')
self.assertTrue(self.tz is not other_tz)
dt = datetime(2012, 3, 26, 12, 0)
other_dt = other_tz.localize(dt)
local_dt = other_dt.astimezone(self.tz)
self.assertTrue(local_dt.tzinfo is not other_dt.tzinfo)
self.assertNotEqual(
local_dt.replace(tzinfo=None), other_dt.replace(tzinfo=None))
class OptimizedUTCTestCase(unittest.TestCase, BaseTzInfoTestCase):
tz = pytz.utc
tz_class = tz.__class__
class LegacyUTCTestCase(unittest.TestCase, BaseTzInfoTestCase):
# Deprecated timezone, but useful for comparison tests.
tz = pytz.timezone('Etc/UTC')
tz_class = StaticTzInfo
class StaticTzInfoTestCase(unittest.TestCase, BaseTzInfoTestCase):
tz = pytz.timezone('GMT')
tz_class = StaticTzInfo
class DstTzInfoTestCase(unittest.TestCase, BaseTzInfoTestCase):
tz = pytz.timezone('Australia/Melbourne')
tz_class = DstTzInfo
def test_suite():
suite = unittest.TestSuite()
suite.addTest(doctest.DocTestSuite('pytz'))
suite.addTest(doctest.DocTestSuite('pytz.tzinfo'))
import test_tzinfo
suite.addTest(unittest.defaultTestLoader.loadTestsFromModule(test_tzinfo))
return suite
if __name__ == '__main__':
warnings.simplefilter("error") # Warnings should be fatal in tests.
unittest.main(defaultTest='test_suite')

View File

@ -0,0 +1,137 @@
#!/usr/bin/env python
'''
$Id: tzfile.py,v 1.8 2004/06/03 00:15:24 zenzen Exp $
'''
try:
from io import StringIO
except ImportError:
from io import StringIO
from datetime import datetime, timedelta
from struct import unpack, calcsize
from pytz.tzinfo import StaticTzInfo, DstTzInfo, memorized_ttinfo
from pytz.tzinfo import memorized_datetime, memorized_timedelta
def _byte_string(s):
"""Cast a string or byte string to an ASCII byte string."""
return s.encode('US-ASCII')
_NULL = _byte_string('\0')
def _std_string(s):
"""Cast a string or byte string to an ASCII string."""
return str(s.decode('US-ASCII'))
def build_tzinfo(zone, fp):
head_fmt = '>4s c 15x 6l'
head_size = calcsize(head_fmt)
(magic, format, ttisgmtcnt, ttisstdcnt,leapcnt, timecnt,
typecnt, charcnt) = unpack(head_fmt, fp.read(head_size))
# Make sure it is a tzfile(5) file
assert magic == _byte_string('TZif'), 'Got magic %s' % repr(magic)
# Read out the transition times, localtime indices and ttinfo structures.
data_fmt = '>%(timecnt)dl %(timecnt)dB %(ttinfo)s %(charcnt)ds' % dict(
timecnt=timecnt, ttinfo='lBB'*typecnt, charcnt=charcnt)
data_size = calcsize(data_fmt)
data = unpack(data_fmt, fp.read(data_size))
# make sure we unpacked the right number of values
assert len(data) == 2 * timecnt + 3 * typecnt + 1
transitions = [memorized_datetime(trans)
for trans in data[:timecnt]]
lindexes = list(data[timecnt:2 * timecnt])
ttinfo_raw = data[2 * timecnt:-1]
tznames_raw = data[-1]
del data
# Process ttinfo into separate structs
ttinfo = []
tznames = {}
i = 0
while i < len(ttinfo_raw):
# have we looked up this timezone name yet?
tzname_offset = ttinfo_raw[i+2]
if tzname_offset not in tznames:
nul = tznames_raw.find(_NULL, tzname_offset)
if nul < 0:
nul = len(tznames_raw)
tznames[tzname_offset] = _std_string(
tznames_raw[tzname_offset:nul])
ttinfo.append((ttinfo_raw[i],
bool(ttinfo_raw[i+1]),
tznames[tzname_offset]))
i += 3
# Now build the timezone object
if len(transitions) == 0:
ttinfo[0][0], ttinfo[0][2]
cls = type(zone, (StaticTzInfo,), dict(
zone=zone,
_utcoffset=memorized_timedelta(ttinfo[0][0]),
_tzname=ttinfo[0][2]))
else:
# Early dates use the first standard time ttinfo
i = 0
while ttinfo[i][1]:
i += 1
if ttinfo[i] == ttinfo[lindexes[0]]:
transitions[0] = datetime.min
else:
transitions.insert(0, datetime.min)
lindexes.insert(0, i)
# calculate transition info
transition_info = []
for i in range(len(transitions)):
inf = ttinfo[lindexes[i]]
utcoffset = inf[0]
if not inf[1]:
dst = 0
else:
for j in range(i-1, -1, -1):
prev_inf = ttinfo[lindexes[j]]
if not prev_inf[1]:
break
dst = inf[0] - prev_inf[0] # dst offset
# Bad dst? Look further. DST > 24 hours happens when
# a timzone has moved across the international dateline.
if dst <= 0 or dst > 3600*3:
for j in range(i+1, len(transitions)):
stdinf = ttinfo[lindexes[j]]
if not stdinf[1]:
dst = inf[0] - stdinf[0]
if dst > 0:
break # Found a useful std time.
tzname = inf[2]
# Round utcoffset and dst to the nearest minute or the
# datetime library will complain. Conversions to these timezones
# might be up to plus or minus 30 seconds out, but it is
# the best we can do.
utcoffset = int((utcoffset + 30) // 60) * 60
dst = int((dst + 30) // 60) * 60
transition_info.append(memorized_ttinfo(utcoffset, dst, tzname))
cls = type(zone, (DstTzInfo,), dict(
zone=zone,
_utc_transition_times=transitions,
_transition_info=transition_info))
return cls()
if __name__ == '__main__':
import os.path
from pprint import pprint
base = os.path.join(os.path.dirname(__file__), 'zoneinfo')
tz = build_tzinfo('Australia/Melbourne',
open(os.path.join(base,'Australia','Melbourne'), 'rb'))
tz = build_tzinfo('US/Eastern',
open(os.path.join(base,'US','Eastern'), 'rb'))
pprint(tz._utc_transition_times)
#print tz.asPython(4)
#print tz.transitions_mapping

View File

@ -0,0 +1,563 @@
'''Base classes and helpers for building zone specific tzinfo classes'''
from datetime import datetime, timedelta, tzinfo
from bisect import bisect_right
try:
set
except NameError:
from sets import Set as set
import pytz
from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError
__all__ = []
_timedelta_cache = {}
def memorized_timedelta(seconds):
'''Create only one instance of each distinct timedelta'''
try:
return _timedelta_cache[seconds]
except KeyError:
delta = timedelta(seconds=seconds)
_timedelta_cache[seconds] = delta
return delta
_epoch = datetime.utcfromtimestamp(0)
_datetime_cache = {0: _epoch}
def memorized_datetime(seconds):
'''Create only one instance of each distinct datetime'''
try:
return _datetime_cache[seconds]
except KeyError:
# NB. We can't just do datetime.utcfromtimestamp(seconds) as this
# fails with negative values under Windows (Bug #90096)
dt = _epoch + timedelta(seconds=seconds)
_datetime_cache[seconds] = dt
return dt
_ttinfo_cache = {}
def memorized_ttinfo(*args):
'''Create only one instance of each distinct tuple'''
try:
return _ttinfo_cache[args]
except KeyError:
ttinfo = (
memorized_timedelta(args[0]),
memorized_timedelta(args[1]),
args[2]
)
_ttinfo_cache[args] = ttinfo
return ttinfo
_notime = memorized_timedelta(0)
def _to_seconds(td):
'''Convert a timedelta to seconds'''
return td.seconds + td.days * 24 * 60 * 60
class BaseTzInfo(tzinfo):
# Overridden in subclass
_utcoffset = None
_tzname = None
zone = None
def __str__(self):
return self.zone
class StaticTzInfo(BaseTzInfo):
'''A timezone that has a constant offset from UTC
These timezones are rare, as most locations have changed their
offset at some point in their history
'''
def fromutc(self, dt):
'''See datetime.tzinfo.fromutc'''
if dt.tzinfo is not None and dt.tzinfo is not self:
raise ValueError('fromutc: dt.tzinfo is not self')
return (dt + self._utcoffset).replace(tzinfo=self)
def utcoffset(self, dt, is_dst=None):
'''See datetime.tzinfo.utcoffset
is_dst is ignored for StaticTzInfo, and exists only to
retain compatibility with DstTzInfo.
'''
return self._utcoffset
def dst(self, dt, is_dst=None):
'''See datetime.tzinfo.dst
is_dst is ignored for StaticTzInfo, and exists only to
retain compatibility with DstTzInfo.
'''
return _notime
def tzname(self, dt, is_dst=None):
'''See datetime.tzinfo.tzname
is_dst is ignored for StaticTzInfo, and exists only to
retain compatibility with DstTzInfo.
'''
return self._tzname
def localize(self, dt, is_dst=False):
'''Convert naive time to local time'''
if dt.tzinfo is not None:
raise ValueError('Not naive datetime (tzinfo is already set)')
return dt.replace(tzinfo=self)
def normalize(self, dt, is_dst=False):
'''Correct the timezone information on the given datetime.
This is normally a no-op, as StaticTzInfo timezones never have
ambiguous cases to correct:
>>> from pytz import timezone
>>> gmt = timezone('GMT')
>>> isinstance(gmt, StaticTzInfo)
True
>>> dt = datetime(2011, 5, 8, 1, 2, 3, tzinfo=gmt)
>>> gmt.normalize(dt) is dt
True
The supported method of converting between timezones is to use
datetime.astimezone(). Currently normalize() also works:
>>> la = timezone('America/Los_Angeles')
>>> dt = la.localize(datetime(2011, 5, 7, 1, 2, 3))
>>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
>>> gmt.normalize(dt).strftime(fmt)
'2011-05-07 08:02:03 GMT (+0000)'
'''
if dt.tzinfo is self:
return dt
if dt.tzinfo is None:
raise ValueError('Naive time - no tzinfo set')
return dt.astimezone(self)
def __repr__(self):
return '<StaticTzInfo %r>' % (self.zone,)
def __reduce__(self):
# Special pickle to zone remains a singleton and to cope with
# database changes.
return pytz._p, (self.zone,)
class DstTzInfo(BaseTzInfo):
'''A timezone that has a variable offset from UTC
The offset might change if daylight savings time comes into effect,
or at a point in history when the region decides to change their
timezone definition.
'''
# Overridden in subclass
_utc_transition_times = None # Sorted list of DST transition times in UTC
_transition_info = None # [(utcoffset, dstoffset, tzname)] corresponding
# to _utc_transition_times entries
zone = None
# Set in __init__
_tzinfos = None
_dst = None # DST offset
def __init__(self, _inf=None, _tzinfos=None):
if _inf:
self._tzinfos = _tzinfos
self._utcoffset, self._dst, self._tzname = _inf
else:
_tzinfos = {}
self._tzinfos = _tzinfos
self._utcoffset, self._dst, self._tzname = self._transition_info[0]
_tzinfos[self._transition_info[0]] = self
for inf in self._transition_info[1:]:
if inf not in _tzinfos:
_tzinfos[inf] = self.__class__(inf, _tzinfos)
def fromutc(self, dt):
'''See datetime.tzinfo.fromutc'''
if (dt.tzinfo is not None
and getattr(dt.tzinfo, '_tzinfos', None) is not self._tzinfos):
raise ValueError('fromutc: dt.tzinfo is not self')
dt = dt.replace(tzinfo=None)
idx = max(0, bisect_right(self._utc_transition_times, dt) - 1)
inf = self._transition_info[idx]
return (dt + inf[0]).replace(tzinfo=self._tzinfos[inf])
def normalize(self, dt):
'''Correct the timezone information on the given datetime
If date arithmetic crosses DST boundaries, the tzinfo
is not magically adjusted. This method normalizes the
tzinfo to the correct one.
To test, first we need to do some setup
>>> from pytz import timezone
>>> utc = timezone('UTC')
>>> eastern = timezone('US/Eastern')
>>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
We next create a datetime right on an end-of-DST transition point,
the instant when the wallclocks are wound back one hour.
>>> utc_dt = datetime(2002, 10, 27, 6, 0, 0, tzinfo=utc)
>>> loc_dt = utc_dt.astimezone(eastern)
>>> loc_dt.strftime(fmt)
'2002-10-27 01:00:00 EST (-0500)'
Now, if we subtract a few minutes from it, note that the timezone
information has not changed.
>>> before = loc_dt - timedelta(minutes=10)
>>> before.strftime(fmt)
'2002-10-27 00:50:00 EST (-0500)'
But we can fix that by calling the normalize method
>>> before = eastern.normalize(before)
>>> before.strftime(fmt)
'2002-10-27 01:50:00 EDT (-0400)'
The supported method of converting between timezones is to use
datetime.astimezone(). Currently, normalize() also works:
>>> th = timezone('Asia/Bangkok')
>>> am = timezone('Europe/Amsterdam')
>>> dt = th.localize(datetime(2011, 5, 7, 1, 2, 3))
>>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
>>> am.normalize(dt).strftime(fmt)
'2011-05-06 20:02:03 CEST (+0200)'
'''
if dt.tzinfo is None:
raise ValueError('Naive time - no tzinfo set')
# Convert dt in localtime to UTC
offset = dt.tzinfo._utcoffset
dt = dt.replace(tzinfo=None)
dt = dt - offset
# convert it back, and return it
return self.fromutc(dt)
def localize(self, dt, is_dst=False):
'''Convert naive time to local time.
This method should be used to construct localtimes, rather
than passing a tzinfo argument to a datetime constructor.
is_dst is used to determine the correct timezone in the ambigous
period at the end of daylight savings time.
>>> from pytz import timezone
>>> fmt = '%Y-%m-%d %H:%M:%S %Z (%z)'
>>> amdam = timezone('Europe/Amsterdam')
>>> dt = datetime(2004, 10, 31, 2, 0, 0)
>>> loc_dt1 = amdam.localize(dt, is_dst=True)
>>> loc_dt2 = amdam.localize(dt, is_dst=False)
>>> loc_dt1.strftime(fmt)
'2004-10-31 02:00:00 CEST (+0200)'
>>> loc_dt2.strftime(fmt)
'2004-10-31 02:00:00 CET (+0100)'
>>> str(loc_dt2 - loc_dt1)
'1:00:00'
Use is_dst=None to raise an AmbiguousTimeError for ambiguous
times at the end of daylight savings
>>> try:
... loc_dt1 = amdam.localize(dt, is_dst=None)
... except AmbiguousTimeError:
... print('Ambiguous')
Ambiguous
is_dst defaults to False
>>> amdam.localize(dt) == amdam.localize(dt, False)
True
is_dst is also used to determine the correct timezone in the
wallclock times jumped over at the start of daylight savings time.
>>> pacific = timezone('US/Pacific')
>>> dt = datetime(2008, 3, 9, 2, 0, 0)
>>> ploc_dt1 = pacific.localize(dt, is_dst=True)
>>> ploc_dt2 = pacific.localize(dt, is_dst=False)
>>> ploc_dt1.strftime(fmt)
'2008-03-09 02:00:00 PDT (-0700)'
>>> ploc_dt2.strftime(fmt)
'2008-03-09 02:00:00 PST (-0800)'
>>> str(ploc_dt2 - ploc_dt1)
'1:00:00'
Use is_dst=None to raise a NonExistentTimeError for these skipped
times.
>>> try:
... loc_dt1 = pacific.localize(dt, is_dst=None)
... except NonExistentTimeError:
... print('Non-existent')
Non-existent
'''
if dt.tzinfo is not None:
raise ValueError('Not naive datetime (tzinfo is already set)')
# Find the two best possibilities.
possible_loc_dt = set()
for delta in [timedelta(days=-1), timedelta(days=1)]:
loc_dt = dt + delta
idx = max(0, bisect_right(
self._utc_transition_times, loc_dt) - 1)
inf = self._transition_info[idx]
tzinfo = self._tzinfos[inf]
loc_dt = tzinfo.normalize(dt.replace(tzinfo=tzinfo))
if loc_dt.replace(tzinfo=None) == dt:
possible_loc_dt.add(loc_dt)
if len(possible_loc_dt) == 1:
return possible_loc_dt.pop()
# If there are no possibly correct timezones, we are attempting
# to convert a time that never happened - the time period jumped
# during the start-of-DST transition period.
if len(possible_loc_dt) == 0:
# If we refuse to guess, raise an exception.
if is_dst is None:
raise NonExistentTimeError(dt)
# If we are forcing the pre-DST side of the DST transition, we
# obtain the correct timezone by winding the clock forward a few
# hours.
elif is_dst:
return self.localize(
dt + timedelta(hours=6), is_dst=True) - timedelta(hours=6)
# If we are forcing the post-DST side of the DST transition, we
# obtain the correct timezone by winding the clock back.
else:
return self.localize(
dt - timedelta(hours=6), is_dst=False) + timedelta(hours=6)
# If we get this far, we have multiple possible timezones - this
# is an ambiguous case occuring during the end-of-DST transition.
# If told to be strict, raise an exception since we have an
# ambiguous case
if is_dst is None:
raise AmbiguousTimeError(dt)
# Filter out the possiblilities that don't match the requested
# is_dst
filtered_possible_loc_dt = [
p for p in possible_loc_dt
if bool(p.tzinfo._dst) == is_dst
]
# Hopefully we only have one possibility left. Return it.
if len(filtered_possible_loc_dt) == 1:
return filtered_possible_loc_dt[0]
if len(filtered_possible_loc_dt) == 0:
filtered_possible_loc_dt = list(possible_loc_dt)
# If we get this far, we have in a wierd timezone transition
# where the clocks have been wound back but is_dst is the same
# in both (eg. Europe/Warsaw 1915 when they switched to CET).
# At this point, we just have to guess unless we allow more
# hints to be passed in (such as the UTC offset or abbreviation),
# but that is just getting silly.
#
# Choose the earliest (by UTC) applicable timezone.
sorting_keys = {}
for local_dt in filtered_possible_loc_dt:
key = local_dt.replace(tzinfo=None) - local_dt.tzinfo._utcoffset
sorting_keys[key] = local_dt
first_key = sorted(sorting_keys)[0]
return sorting_keys[first_key]
def utcoffset(self, dt, is_dst=None):
'''See datetime.tzinfo.utcoffset
The is_dst parameter may be used to remove ambiguity during DST
transitions.
>>> from pytz import timezone
>>> tz = timezone('America/St_Johns')
>>> ambiguous = datetime(2009, 10, 31, 23, 30)
>>> tz.utcoffset(ambiguous, is_dst=False)
datetime.timedelta(-1, 73800)
>>> tz.utcoffset(ambiguous, is_dst=True)
datetime.timedelta(-1, 77400)
>>> try:
... tz.utcoffset(ambiguous)
... except AmbiguousTimeError:
... print('Ambiguous')
Ambiguous
'''
if dt is None:
return None
elif dt.tzinfo is not self:
dt = self.localize(dt, is_dst)
return dt.tzinfo._utcoffset
else:
return self._utcoffset
def dst(self, dt, is_dst=None):
'''See datetime.tzinfo.dst
The is_dst parameter may be used to remove ambiguity during DST
transitions.
>>> from pytz import timezone
>>> tz = timezone('America/St_Johns')
>>> normal = datetime(2009, 9, 1)
>>> tz.dst(normal)
datetime.timedelta(0, 3600)
>>> tz.dst(normal, is_dst=False)
datetime.timedelta(0, 3600)
>>> tz.dst(normal, is_dst=True)
datetime.timedelta(0, 3600)
>>> ambiguous = datetime(2009, 10, 31, 23, 30)
>>> tz.dst(ambiguous, is_dst=False)
datetime.timedelta(0)
>>> tz.dst(ambiguous, is_dst=True)
datetime.timedelta(0, 3600)
>>> try:
... tz.dst(ambiguous)
... except AmbiguousTimeError:
... print('Ambiguous')
Ambiguous
'''
if dt is None:
return None
elif dt.tzinfo is not self:
dt = self.localize(dt, is_dst)
return dt.tzinfo._dst
else:
return self._dst
def tzname(self, dt, is_dst=None):
'''See datetime.tzinfo.tzname
The is_dst parameter may be used to remove ambiguity during DST
transitions.
>>> from pytz import timezone
>>> tz = timezone('America/St_Johns')
>>> normal = datetime(2009, 9, 1)
>>> tz.tzname(normal)
'NDT'
>>> tz.tzname(normal, is_dst=False)
'NDT'
>>> tz.tzname(normal, is_dst=True)
'NDT'
>>> ambiguous = datetime(2009, 10, 31, 23, 30)
>>> tz.tzname(ambiguous, is_dst=False)
'NST'
>>> tz.tzname(ambiguous, is_dst=True)
'NDT'
>>> try:
... tz.tzname(ambiguous)
... except AmbiguousTimeError:
... print('Ambiguous')
Ambiguous
'''
if dt is None:
return self.zone
elif dt.tzinfo is not self:
dt = self.localize(dt, is_dst)
return dt.tzinfo._tzname
else:
return self._tzname
def __repr__(self):
if self._dst:
dst = 'DST'
else:
dst = 'STD'
if self._utcoffset > _notime:
return '<DstTzInfo %r %s+%s %s>' % (
self.zone, self._tzname, self._utcoffset, dst
)
else:
return '<DstTzInfo %r %s%s %s>' % (
self.zone, self._tzname, self._utcoffset, dst
)
def __reduce__(self):
# Special pickle to zone remains a singleton and to cope with
# database changes.
return pytz._p, (
self.zone,
_to_seconds(self._utcoffset),
_to_seconds(self._dst),
self._tzname
)
def unpickler(zone, utcoffset=None, dstoffset=None, tzname=None):
"""Factory function for unpickling pytz tzinfo instances.
This is shared for both StaticTzInfo and DstTzInfo instances, because
database changes could cause a zones implementation to switch between
these two base classes and we can't break pickles on a pytz version
upgrade.
"""
# Raises a KeyError if zone no longer exists, which should never happen
# and would be a bug.
tz = pytz.timezone(zone)
# A StaticTzInfo - just return it
if utcoffset is None:
return tz
# This pickle was created from a DstTzInfo. We need to
# determine which of the list of tzinfo instances for this zone
# to use in order to restore the state of any datetime instances using
# it correctly.
utcoffset = memorized_timedelta(utcoffset)
dstoffset = memorized_timedelta(dstoffset)
try:
return tz._tzinfos[(utcoffset, dstoffset, tzname)]
except KeyError:
# The particular state requested in this timezone no longer exists.
# This indicates a corrupt pickle, or the timezone database has been
# corrected violently enough to make this particular
# (utcoffset,dstoffset) no longer exist in the zone, or the
# abbreviation has been changed.
pass
# See if we can find an entry differing only by tzname. Abbreviations
# get changed from the initial guess by the database maintainers to
# match reality when this information is discovered.
for localized_tz in list(tz._tzinfos.values()):
if (localized_tz._utcoffset == utcoffset
and localized_tz._dst == dstoffset):
return localized_tz
# This (utcoffset, dstoffset) information has been removed from the
# zone. Add it back. This might occur when the database maintainers have
# corrected incorrect information. datetime instances using this
# incorrect information will continue to do so, exactly as they were
# before being pickled. This is purely an overly paranoid safety net - I
# doubt this will ever been needed in real life.
inf = (utcoffset, dstoffset, tzname)
tz._tzinfos[inf] = tz.__class__(inf, tz._tzinfos)
return tz._tzinfos[inf]

View File

@ -0,0 +1,547 @@
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
interchange format.
:mod:`simplejson` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.4 and Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
extension for speedups.
Encoding basic Python object hierarchies::
>>> import simplejson as json
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
>>> print(json.dumps("\"foo\bar"))
"\"foo\bar"
>>> print(json.dumps(u'\u1234'))
"\u1234"
>>> print(json.dumps('\\'))
"\\"
>>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
{"a": 0, "b": 0, "c": 0}
>>> from simplejson.compat import StringIO
>>> io = StringIO()
>>> json.dump(['streaming API'], io)
>>> io.getvalue()
'["streaming API"]'
Compact encoding::
>>> import simplejson as json
>>> obj = [1,2,3,{'4': 5, '6': 7}]
>>> json.dumps(obj, separators=(',',':'), sort_keys=True)
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
>>> import simplejson as json
>>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' '))
{
"4": 5,
"6": 7
}
Decoding JSON::
>>> import simplejson as json
>>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
True
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True
>>> from simplejson.compat import StringIO
>>> io = StringIO('["streaming API"]')
>>> json.load(io)[0] == 'streaming API'
True
Specializing JSON object decoding::
>>> import simplejson as json
>>> def as_complex(dct):
... if '__complex__' in dct:
... return complex(dct['real'], dct['imag'])
... return dct
...
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex)
(1+2j)
>>> from decimal import Decimal
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
True
Specializing JSON object encoding::
>>> import simplejson as json
>>> def encode_complex(obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
... raise TypeError(repr(o) + " is not JSON serializable")
...
>>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]'
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
'[2.0, 1.0]'
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
'[2.0, 1.0]'
Using simplejson.tool from the shell to validate and pretty-print::
$ echo '{"json":"obj"}' | python -m simplejson.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
Expecting property name: line 1 column 3 (char 2)
"""
__version__ = '3.3.0'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
'OrderedDict', 'simple_first',
]
__author__ = 'Bob Ippolito <bob@redivi.com>'
from decimal import Decimal
from .scanner import JSONDecodeError
from .decoder import JSONDecoder
from .encoder import JSONEncoder, JSONEncoderForHTML
def _import_OrderedDict():
import collections
try:
return collections.OrderedDict
except AttributeError:
from . import ordered_dict
return ordered_dict.OrderedDict
OrderedDict = _import_OrderedDict()
def _import_c_make_encoder():
try:
from ._speedups import make_encoder
return make_encoder
except ImportError:
return None
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
use_decimal=True,
namedtuple_as_object=True,
tuple_as_array=True,
bigint_as_string=False,
item_sort_key=None,
for_json=False,
ignore_nan=False,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
If *skipkeys* is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If *ensure_ascii* is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error.
If *check_circular* is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If *allow_nan* is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the original JSON specification, instead of using
the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
*ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If specified, *separators* should be an
``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
compact JSON representation, you should specify ``(',', ':')`` to eliminate
whitespace.
*encoding* is the character encoding for str instances, default is UTF-8.
*default(obj)* is a function that should return a serializable version
of obj or raise ``TypeError``. The default simply raises ``TypeError``.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
If *namedtuple_as_object* is true (default: ``True``),
:class:`tuple` subclasses with ``_asdict()`` methods will be encoded
as JSON objects.
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise. Note that this is still a
lossy operation that will not round-trip correctly and should be used
sparingly.
If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precedence over
*sort_keys*.
If *sort_keys* is true (default: ``False``), the output of dictionaries
will be sorted by item.
If *for_json* is true (default: ``False``), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
``null`` in compliance with the ECMA-262 specification. If true, this will
override *allow_nan*.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead
of subclassing whenever possible.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not item_sort_key
and not for_json and not ignore_nan and not kw):
iterable = _default_encoder.iterencode(obj)
else:
if cls is None:
cls = JSONEncoder
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding,
default=default, use_decimal=use_decimal,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
bigint_as_string=bigint_as_string,
sort_keys=sort_keys,
item_sort_key=item_sort_key,
for_json=for_json,
ignore_nan=ignore_nan,
**kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at
# a debuggability cost
for chunk in iterable:
fp.write(chunk)
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the return value will be a
``unicode`` instance subject to normal Python ``str`` to ``unicode``
coercion rules instead of being escaped to an ASCII ``str``.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If specified, ``separators`` should be an
``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
compact JSON representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``encoding`` is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
If *namedtuple_as_object* is true (default: ``True``),
:class:`tuple` subclasses with ``_asdict()`` methods will be encoded
as JSON objects.
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *bigint_as_string* is true (not the default), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise.
If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precendence over
*sort_keys*.
If *sort_keys* is true (default: ``False``), the output of dictionaries
will be sorted by item.
If *for_json* is true (default: ``False``), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
``null`` in compliance with the ECMA-262 specification. If true, this will
override *allow_nan*.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing
whenever possible.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not sort_keys
and not item_sort_key and not for_json
and not ignore_nan and not kw):
return _default_encoder.encode(obj)
if cls is None:
cls = JSONEncoder
return cls(
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding, default=default,
use_decimal=use_decimal,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
bigint_as_string=bigint_as_string,
sort_keys=sort_keys,
item_sort_key=item_sort_key,
for_json=for_json,
ignore_nan=ignore_nan,
**kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
**kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with
the result of any object literal decode with an ordered list of pairs.
The return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
"""
return loads(fp.read(),
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
use_decimal=use_decimal, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object.
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with
the result of any object literal decode with an ordered list of pairs.
The return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
"""
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None
and not use_decimal and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
if object_hook is not None:
kw['object_hook'] = object_hook
if object_pairs_hook is not None:
kw['object_pairs_hook'] = object_pairs_hook
if parse_float is not None:
kw['parse_float'] = parse_float
if parse_int is not None:
kw['parse_int'] = parse_int
if parse_constant is not None:
kw['parse_constant'] = parse_constant
if use_decimal:
if parse_float is not None:
raise TypeError("use_decimal=True implies parse_float=Decimal")
kw['parse_float'] = Decimal
return cls(encoding=encoding, **kw).decode(s)
def _toggle_speedups(enabled):
from . import decoder as dec
from . import encoder as enc
from . import scanner as scan
c_make_encoder = _import_c_make_encoder()
if enabled:
dec.scanstring = dec.c_scanstring or dec.py_scanstring
enc.c_make_encoder = c_make_encoder
enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
enc.py_encode_basestring_ascii)
scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
else:
dec.scanstring = dec.py_scanstring
enc.c_make_encoder = None
enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner
global _default_decoder
_default_decoder = JSONDecoder(
encoding=None,
object_hook=None,
object_pairs_hook=None,
)
global _default_encoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
)
def simple_first(kv):
"""Helper function to pass to item_sort_key to sort simple
elements to the top, then container elements.
"""
return (isinstance(kv[1], (list, dict, tuple)), kv[0])

View File

@ -0,0 +1,43 @@
"""Python 3 compatibility shims
"""
import sys
if sys.version_info[0] < 3:
PY3 = False
def b(s):
return s
def u(s):
return str(s, 'unicode_escape')
import io as StringIO
StringIO = BytesIO = StringIO.StringIO
text_type = str
binary_type = str
string_types = (str,)
integer_types = (int, int)
chr = chr
reload_module = reload
def fromhex(s):
return s.decode('hex')
else:
PY3 = True
from imp import reload as reload_module
import codecs
def b(s):
return codecs.latin_1_encode(s)[0]
def u(s):
return s
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
text_type = str
binary_type = bytes
string_types = (str,)
integer_types = (int,)
def chr(s):
return u(chr(s))
def fromhex(s):
return bytes.fromhex(s)
long_type = integer_types[-1]

View File

@ -0,0 +1,389 @@
"""Implementation of JSONDecoder
"""
import re
import sys
import struct
from .compat import fromhex, b, u, text_type, binary_type, PY3, chr
from .scanner import make_scanner, JSONDecodeError
def _import_c_scanstring():
try:
from ._speedups import scanstring
return scanstring
except ImportError:
return None
c_scanstring = _import_c_scanstring()
# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
# compatibility, but it was never in the __all__
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
def _floatconstants():
_BYTES = fromhex('7FF80000000000007FF0000000000000')
# The struct module in Python 2.4 would get frexp() out of range here
# when an endian is specified in the format string. Fixed in Python 2.5+
if sys.byteorder != 'big':
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
nan, inf = struct.unpack('dd', _BYTES)
return nan, inf, -inf
NaN, PosInf, NegInf = _floatconstants()
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
'NaN': NaN,
}
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
'"': u('"'), '\\': u('\u005c'), '/': u('/'),
'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
}
DEFAULT_ENCODING = "utf-8"
def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
_PY3=PY3, _maxunicode=sys.maxunicode):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
on attempt to decode an invalid string. If strict is False then literal
control characters are allowed in the string.
Returns a tuple of the decoded string and the index of the character in s
after the end quote."""
if encoding is None:
encoding = DEFAULT_ENCODING
chunks = []
_append = chunks.append
begin = end - 1
while 1:
chunk = _m(s, end)
if chunk is None:
raise JSONDecodeError(
"Unterminated string starting at", s, begin)
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
if content:
if not _PY3 and not isinstance(content, text_type):
content = text_type(content, encoding)
_append(content)
# Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows
if terminator == '"':
break
elif terminator != '\\':
if strict:
msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
else:
_append(terminator)
continue
try:
esc = s[end]
except IndexError:
raise JSONDecodeError(
"Unterminated string starting at", s, begin)
# If not a unicode escape sequence, must be in the lookup table
if esc != 'u':
try:
char = _b[esc]
except KeyError:
msg = "Invalid \\X escape sequence %r"
raise JSONDecodeError(msg, s, end)
end += 1
else:
# Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
escX = esc[1:2]
if len(esc) != 4 or escX == 'x' or escX == 'X':
raise JSONDecodeError(msg, s, end - 1)
try:
uni = int(esc, 16)
except ValueError:
raise JSONDecodeError(msg, s, end - 1)
end += 5
# Check for surrogate pair on UCS-4 systems
# Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through
if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'):
esc2 = s[end + 2:end + 6]
escX = esc2[1:2]
if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
try:
uni2 = int(esc2, 16)
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00))
end += 6
char = chr(uni)
# Append the unescaped character
_append(char)
return _join(chunks), end
# Use speedup if available
scanstring = c_scanstring or py_scanstring
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
def JSONObject(state, encoding, strict, scan_once, object_hook,
object_pairs_hook, memo=None,
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
(s, end) = state
# Backwards compatibility
if memo is None:
memo = {}
memo_get = memo.setdefault
pairs = []
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
# Normally we expect nextchar == '"'
if nextchar != '"':
if nextchar in _ws:
end = _w(s, end).end()
nextchar = s[end:end + 1]
# Trivial empty object
if nextchar == '}':
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end + 1
pairs = {}
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end + 1
elif nextchar != '"':
raise JSONDecodeError(
"Expecting property name enclosed in double quotes",
s, end)
end += 1
while True:
key, end = scanstring(s, end, encoding, strict)
key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
end = _w(s, end).end()
if s[end:end + 1] != ':':
raise JSONDecodeError("Expecting ':' delimiter", s, end)
end += 1
try:
if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end + 1).end()
except IndexError:
pass
value, end = scan_once(s, end)
pairs.append((key, value))
try:
nextchar = s[end]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end]
except IndexError:
nextchar = ''
end += 1
if nextchar == '}':
break
elif nextchar != ',':
raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
try:
nextchar = s[end]
if nextchar in _ws:
end += 1
nextchar = s[end]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end]
except IndexError:
nextchar = ''
end += 1
if nextchar != '"':
raise JSONDecodeError(
"Expecting property name enclosed in double quotes",
s, end - 1)
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
pairs = dict(pairs)
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end
def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
(s, end) = state
values = []
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end:end + 1]
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
elif nextchar == '':
raise JSONDecodeError("Expecting value or ']'", s, end)
_append = values.append
while True:
value, end = scan_once(s, end)
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end:end + 1]
end += 1
if nextchar == ']':
break
elif nextchar != ',':
raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
try:
if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end + 1).end()
except IndexError:
pass
return values, end
class JSONDecoder(object):
"""Simple JSON <http://json.org> decoder
Performs the following translations in decoding by default:
+---------------+-------------------+
| JSON | Python |
+===============+===================+
| object | dict |
+---------------+-------------------+
| array | list |
+---------------+-------------------+
| string | unicode |
+---------------+-------------------+
| number (int) | int, long |
+---------------+-------------------+
| number (real) | float |
+---------------+-------------------+
| true | True |
+---------------+-------------------+
| false | False |
+---------------+-------------------+
| null | None |
+---------------+-------------------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.
"""
def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
"""
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with
the result of any object literal decode with an ordered list of pairs.
The return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*strict* controls the parser's behavior when it encounters an
invalid control character in a string. The default setting of
``True`` means that unescaped control characters are parse errors, if
``False`` then control characters will be allowed in strings.
"""
if encoding is None:
encoding = DEFAULT_ENCODING
self.encoding = encoding
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.strict = strict
self.parse_object = JSONObject
self.parse_array = JSONArray
self.parse_string = scanstring
self.memo = {}
self.scan_once = make_scanner(self)
def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
instance containing a JSON document)
"""
if _PY3 and isinstance(s, binary_type):
s = s.decode(self.encoding)
obj, end = self.raw_decode(s)
end = _w(s, end).end()
if end != len(s):
raise JSONDecodeError("Extra data", s, end, len(s))
return obj
def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
beginning with a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
Optionally, ``idx`` can be used to specify an offset in ``s`` where
the JSON document begins.
This can be used to decode a JSON document from a string that may
have extraneous data at the end.
"""
if _PY3 and not isinstance(s, text_type):
raise TypeError("Input string must be text, not bytes")
return self.scan_once(s, idx=_w(s, idx).end())

View File

@ -0,0 +1,629 @@
"""Implementation of JSONEncoder
"""
import re
from operator import itemgetter
from decimal import Decimal
from .compat import u, chr, binary_type, string_types, integer_types, PY3
import collections
def _import_speedups():
try:
from . import _speedups
return _speedups.encode_basestring_ascii, _speedups.make_encoder
except ImportError:
return None, None
c_encode_basestring_ascii, c_make_encoder = _import_speedups()
from simplejson.decoder import PosInf
#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
# This is required because u() will mangle the string and ur'' isn't valid
# python3 syntax
ESCAPE = re.compile('[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
'\\': '\\\\',
'"': '\\"',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
}
for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
for i in [0x2028, 0x2029]:
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
FLOAT_REPR = repr
def encode_basestring(s, _PY3=PY3, _q=u('"')):
"""Return a JSON representation of a Python string
"""
if _PY3:
if isinstance(s, binary_type):
s = s.decode('utf-8')
else:
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
def replace(match):
return ESCAPE_DCT[match.group(0)]
return _q + ESCAPE.sub(replace, s) + _q
def py_encode_basestring_ascii(s, _PY3=PY3):
"""Return an ASCII-only JSON representation of a Python string
"""
if _PY3:
if isinstance(s, binary_type):
s = s.decode('utf-8')
else:
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
def replace(match):
s = match.group(0)
try:
return ESCAPE_DCT[s]
except KeyError:
n = ord(s)
if n < 0x10000:
#return '\\u{0:04x}'.format(n)
return '\\u%04x' % (n,)
else:
# surrogate pair
n -= 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
s2 = 0xdc00 | (n & 0x3ff)
#return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
return '\\u%04x\\u%04x' % (s1, s2)
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
encode_basestring_ascii = (
c_encode_basestring_ascii or py_encode_basestring_ascii)
class JSONEncoder(object):
"""Extensible JSON <http://json.org> encoder for Python data structures.
Supports the following objects and types by default:
+-------------------+---------------+
| Python | JSON |
+===================+===============+
| dict, namedtuple | object |
+-------------------+---------------+
| list, tuple | array |
+-------------------+---------------+
| str, unicode | string |
+-------------------+---------------+
| int, long, float | number |
+-------------------+---------------+
| True | true |
+-------------------+---------------+
| False | false |
+-------------------+---------------+
| None | null |
+-------------------+---------------+
To extend this to recognize other objects, subclass and implement a
``.default()`` method with another method that returns a serializable
object for ``o`` if possible, otherwise it should call the superclass
implementation (to raise ``TypeError``).
"""
item_separator = ', '
key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
item_sort_key=None, for_json=False, ignore_nan=False):
"""Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt
encoding of keys that are not str, int, long, float or None. If
skipkeys is True, such items are simply skipped.
If ensure_ascii is true, the output is guaranteed to be str
objects with all incoming unicode characters escaped. If
ensure_ascii is false, the output will be unicode object.
If check_circular is true, then lists, dicts, and custom encoded
objects will be checked for circular references during encoding to
prevent an infinite recursion (which would cause an OverflowError).
Otherwise, no such check takes place.
If allow_nan is true, then NaN, Infinity, and -Infinity will be
encoded as such. This behavior is not JSON specification compliant,
but is consistent with most JavaScript based encoders and decoders.
Otherwise, it will be a ValueError to encode such floats.
If sort_keys is true, then the output of dictionaries will be
sorted by key; this is useful for regression tests to ensure
that JSON serializations can be compared on a day-to-day basis.
If indent is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If specified, separators should be an (item_separator, key_separator)
tuple. The default is (', ', ': ') if *indent* is ``None`` and
(',', ': ') otherwise. To get the most compact JSON representation,
you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
version of the object or raise a ``TypeError``.
If encoding is not None, then all input strings will be
transformed into unicode using that encoding prior to JSON-encoding.
The default is UTF-8.
If use_decimal is true (not the default), ``decimal.Decimal`` will
be supported directly by the encoder. For the inverse, decode JSON
with ``parse_float=decimal.Decimal``.
If namedtuple_as_object is true (the default), objects with
``_asdict()`` methods will be encoded as JSON objects.
If tuple_as_array is true (the default), tuple (and subclasses) will
be encoded as JSON arrays.
If bigint_as_string is true (not the default), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise.
If specified, item_sort_key is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key.
If for_json is true (not the default), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
as ``null`` in compliance with the ECMA-262 specification. If true,
this will override *allow_nan*.
"""
self.skipkeys = skipkeys
self.ensure_ascii = ensure_ascii
self.check_circular = check_circular
self.allow_nan = allow_nan
self.sort_keys = sort_keys
self.use_decimal = use_decimal
self.namedtuple_as_object = namedtuple_as_object
self.tuple_as_array = tuple_as_array
self.bigint_as_string = bigint_as_string
self.item_sort_key = item_sort_key
self.for_json = for_json
self.ignore_nan = ignore_nan
if indent is not None and not isinstance(indent, string_types):
indent = indent * ' '
self.indent = indent
if separators is not None:
self.item_separator, self.key_separator = separators
elif indent is not None:
self.item_separator = ','
if default is not None:
self.default = default
self.encoding = encoding
def default(self, o):
"""Implement this method in a subclass such that it returns
a serializable object for ``o``, or calls the base implementation
(to raise a ``TypeError``).
For example, to support arbitrary iterators, you could
implement default like this::
def default(self, o):
try:
iterable = iter(o)
except TypeError:
pass
else:
return list(iterable)
return JSONEncoder.default(self, o)
"""
raise TypeError(repr(o) + " is not JSON serializable")
def encode(self, o):
"""Return a JSON string representation of a Python data structure.
>>> from simplejson import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
"""
# This is for extremely simple cases and benchmarks.
if isinstance(o, binary_type):
_encoding = self.encoding
if (_encoding is not None and not (_encoding == 'utf-8')):
o = o.decode(_encoding)
if isinstance(o, string_types):
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
return encode_basestring(o)
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True)
if not isinstance(chunks, (list, tuple)):
chunks = list(chunks)
if self.ensure_ascii:
return ''.join(chunks)
else:
return ''.join(chunks)
def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string
representation as available.
For example::
for chunk in JSONEncoder().iterencode(bigobject):
mysocket.write(chunk)
"""
if self.check_circular:
markers = {}
else:
markers = None
if self.ensure_ascii:
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
if self.encoding != 'utf-8':
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
if isinstance(o, binary_type):
o = o.decode(_encoding)
return _orig_encoder(o)
def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
_repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on
# the internals.
if o != o:
text = 'NaN'
elif o == _inf:
text = 'Infinity'
elif o == _neginf:
text = '-Infinity'
else:
return _repr(o)
if ignore_nan:
text = 'null'
elif not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
key_memo = {}
if (_one_shot and c_make_encoder is not None
and self.indent is None):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
self.bigint_as_string, self.item_sort_key,
self.encoding, self.for_json, self.ignore_nan,
Decimal)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
self.bigint_as_string, self.item_sort_key,
self.encoding, self.for_json,
Decimal=Decimal)
try:
return _iterencode(o, 0)
finally:
key_memo.clear()
class JSONEncoderForHTML(JSONEncoder):
"""An encoder that produces JSON safe to embed in HTML.
To embed JSON content in, say, a script tag on a web page, the
characters &, < and > should be escaped. They cannot be escaped
with the usual entities (e.g. &amp;) because they are not expanded
within <script> tags.
"""
def encode(self, o):
# Override JSONEncoder.encode because it has hacks for
# performance that make things more complicated.
chunks = self.iterencode(o, True)
if self.ensure_ascii:
return ''.join(chunks)
else:
return ''.join(chunks)
def iterencode(self, o, _one_shot=False):
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
for chunk in chunks:
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
chunk = chunk.replace('>', '\\u003e')
yield chunk
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
_bigint_as_string, _item_sort_key, _encoding, _for_json,
## HACK: hand-optimized bytecode; turn globals into locals
_PY3=PY3,
ValueError=ValueError,
string_types=string_types,
Decimal=Decimal,
dict=dict,
float=float,
id=id,
integer_types=integer_types,
isinstance=isinstance,
list=list,
str=str,
tuple=tuple,
):
if _item_sort_key and not isinstance(_item_sort_key, collections.Callable):
raise TypeError("item_sort_key must be None or callable")
elif _sort_keys and not _item_sort_key:
_item_sort_key = itemgetter(0)
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
return
if markers is not None:
markerid = id(lst)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + (_indent * _current_indent_level)
separator = _item_separator + newline_indent
buf += newline_indent
else:
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
else:
buf = separator
if (isinstance(value, string_types) or
(_PY3 and isinstance(value, binary_type))):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, integer_types):
yield ((buf + str(value))
if (not _bigint_as_string or
(-1 << 53) < value < (1 << 53))
else (buf + '"' + str(value) + '"'))
elif isinstance(value, float):
yield buf + _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield buf + str(value)
else:
yield buf
for_json = _for_json and getattr(value, 'for_json', None)
if for_json and isinstance(for_json, collections.Callable):
chunks = _iterencode(for_json(), _current_indent_level)
elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
else:
_asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
if _asdict and isinstance(_asdict, collections.Callable):
chunks = _iterencode_dict(_asdict(),
_current_indent_level)
elif _tuple_as_array and isinstance(value, tuple):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + (_indent * _current_indent_level)
yield ']'
if markers is not None:
del markers[markerid]
def _stringify_key(key):
if isinstance(key, string_types): # pragma: no cover
pass
elif isinstance(key, binary_type):
key = key.decode(_encoding)
elif isinstance(key, float):
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, integer_types):
key = str(key)
elif _use_decimal and isinstance(key, Decimal):
key = str(key)
elif _skipkeys:
key = None
else:
raise TypeError("key " + repr(key) + " is not a string")
return key
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + (_indent * _current_indent_level)
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _PY3:
iteritems = list(dct.items())
else:
iteritems = iter(dct.items())
if _item_sort_key:
items = []
for k, v in list(dct.items()):
if not isinstance(k, string_types):
k = _stringify_key(k)
if k is None:
continue
items.append((k, v))
items.sort(key=_item_sort_key)
else:
items = iteritems
for key, value in items:
if not (_item_sort_key or isinstance(key, string_types)):
key = _stringify_key(key)
if key is None:
# _skipkeys must be True
continue
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if (isinstance(value, string_types) or
(_PY3 and isinstance(value, binary_type))):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, integer_types):
yield (str(value)
if (not _bigint_as_string or
(-1 << 53) < value < (1 << 53))
else ('"' + str(value) + '"'))
elif isinstance(value, float):
yield _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield str(value)
else:
for_json = _for_json and getattr(value, 'for_json', None)
if for_json and isinstance(for_json, collections.Callable):
chunks = _iterencode(for_json(), _current_indent_level)
elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
else:
_asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
if _asdict and isinstance(_asdict, collections.Callable):
chunks = _iterencode_dict(_asdict(),
_current_indent_level)
elif _tuple_as_array and isinstance(value, tuple):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + (_indent * _current_indent_level)
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, _current_indent_level):
if (isinstance(o, string_types) or
(_PY3 and isinstance(o, binary_type))):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, integer_types):
yield (str(o)
if (not _bigint_as_string or
(-1 << 53) < o < (1 << 53))
else ('"' + str(o) + '"'))
elif isinstance(o, float):
yield _floatstr(o)
else:
for_json = _for_json and getattr(o, 'for_json', None)
if for_json and isinstance(for_json, collections.Callable):
for chunk in _iterencode(for_json(), _current_indent_level):
yield chunk
elif isinstance(o, list):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
else:
_asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
if _asdict and isinstance(_asdict, collections.Callable):
for chunk in _iterencode_dict(_asdict(),
_current_indent_level):
yield chunk
elif (_tuple_as_array and isinstance(o, tuple)):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
elif isinstance(o, dict):
for chunk in _iterencode_dict(o, _current_indent_level):
yield chunk
elif _use_decimal and isinstance(o, Decimal):
yield str(o)
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
if markers is not None:
del markers[markerid]
return _iterencode

View File

@ -0,0 +1,119 @@
"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger
http://code.activestate.com/recipes/576693/
"""
from UserDict import DictMixin
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
try:
all
except NameError:
def all(seq):
for elem in seq:
if not elem:
return False
return True
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
if last:
key = next(reversed(self))
else:
key = next(iter(self))
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self.items()))
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
return len(self)==len(other) and \
all(p==q for p, q in zip(list(self.items()), list(other.items())))
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other

View File

@ -0,0 +1,125 @@
"""JSON token scanner
"""
import re
def _import_c_make_scanner():
try:
from simplejson._speedups import make_scanner
return make_scanner
except ImportError:
return None
c_make_scanner = _import_c_make_scanner()
__all__ = ['make_scanner', 'JSONDecodeError']
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
end: The end index of doc where parsing failed (may be None)
lineno: The line corresponding to pos
colno: The column corresponding to pos
endlineno: The line corresponding to end (may be None)
endcolno: The column corresponding to end (may be None)
"""
# Note that this exception is used from _speedups
def __init__(self, msg, doc, pos, end=None):
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
self.msg = msg
self.doc = doc
self.pos = pos
self.end = end
self.lineno, self.colno = linecol(doc, pos)
if end is not None:
self.endlineno, self.endcolno = linecol(doc, end)
else:
self.endlineno, self.endcolno = None, None
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
msg = msg.replace('%r', repr(doc[pos:pos + 1]))
if end is None:
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
parse_string = context.parse_string
match_number = NUMBER_RE.match
encoding = context.encoding
strict = context.strict
parse_float = context.parse_float
parse_int = context.parse_int
parse_constant = context.parse_constant
object_hook = context.object_hook
object_pairs_hook = context.object_pairs_hook
memo = context.memo
def _scan_once(string, idx):
errmsg = 'Expecting value'
try:
nextchar = string[idx]
except IndexError:
raise JSONDecodeError(errmsg, string, idx)
if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), encoding, strict,
_scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
return None, idx + 4
elif nextchar == 't' and string[idx:idx + 4] == 'true':
return True, idx + 4
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
return False, idx + 5
m = match_number(string, idx)
if m is not None:
integer, frac, exp = m.groups()
if frac or exp:
res = parse_float(integer + (frac or '') + (exp or ''))
else:
res = parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise JSONDecodeError(errmsg, string, idx)
def scan_once(string, idx):
try:
return _scan_once(string, idx)
finally:
memo.clear()
return scan_once
make_scanner = c_make_scanner or py_make_scanner

View File

@ -0,0 +1,79 @@
import unittest
import doctest
import sys
class OptionalExtensionTestSuite(unittest.TestSuite):
def run(self, result):
import simplejson
run = unittest.TestSuite.run
run(self, result)
if simplejson._import_c_make_encoder() is None:
TestMissingSpeedups().run(result)
else:
simplejson._toggle_speedups(False)
run(self, result)
simplejson._toggle_speedups(True)
return result
class TestMissingSpeedups(unittest.TestCase):
def runTest(self):
if hasattr(sys, 'pypy_translation_info'):
"PyPy doesn't need speedups! :)"
elif hasattr(self, 'skipTest'):
self.skipTest('_speedups.so is missing!')
def additional_tests(suite=None):
import simplejson
import simplejson.encoder
import simplejson.decoder
if suite is None:
suite = unittest.TestSuite()
for mod in (simplejson, simplejson.encoder, simplejson.decoder):
suite.addTest(doctest.DocTestSuite(mod))
suite.addTest(doctest.DocFileSuite('../../index.rst'))
return suite
def all_tests_suite():
suite = unittest.TestLoader().loadTestsFromNames([
'simplejson.tests.test_bigint_as_string',
'simplejson.tests.test_check_circular',
'simplejson.tests.test_decode',
'simplejson.tests.test_default',
'simplejson.tests.test_dump',
'simplejson.tests.test_encode_basestring_ascii',
'simplejson.tests.test_encode_for_html',
'simplejson.tests.test_errors',
'simplejson.tests.test_fail',
'simplejson.tests.test_float',
'simplejson.tests.test_indent',
'simplejson.tests.test_pass1',
'simplejson.tests.test_pass2',
'simplejson.tests.test_pass3',
'simplejson.tests.test_recursion',
'simplejson.tests.test_scanstring',
'simplejson.tests.test_separators',
'simplejson.tests.test_speedups',
'simplejson.tests.test_unicode',
'simplejson.tests.test_decimal',
'simplejson.tests.test_tuple',
'simplejson.tests.test_namedtuple',
'simplejson.tests.test_tool',
'simplejson.tests.test_for_json',
])
suite = additional_tests(suite)
return OptionalExtensionTestSuite([suite])
def main():
runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v'))
suite = all_tests_suite()
raise SystemExit(not runner.run(suite).wasSuccessful())
if __name__ == '__main__':
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
main()

View File

@ -0,0 +1,58 @@
from unittest import TestCase
import simplejson as json
from simplejson.compat import long_type
class TestBigintAsString(TestCase):
# Python 2.5, at least the one that ships on Mac OS X, calculates
# 2 ** 53 as 0! It manages to calculate 1 << 53 correctly.
values = [(200, 200),
((1 << 53) - 1, 9007199254740991),
((1 << 53), '9007199254740992'),
((1 << 53) + 1, '9007199254740993'),
(-100, -100),
((-1 << 53), '-9007199254740992'),
((-1 << 53) - 1, '-9007199254740993'),
((-1 << 53) + 1, -9007199254740991)]
def test_ints(self):
for val, expect in self.values:
self.assertEqual(
val,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, bigint_as_string=True)))
def test_lists(self):
for val, expect in self.values:
val = [val, val]
expect = [expect, expect]
self.assertEqual(
val,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, bigint_as_string=True)))
def test_dicts(self):
for val, expect in self.values:
val = {'k': val}
expect = {'k': expect}
self.assertEqual(
val,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, bigint_as_string=True)))
def test_dict_keys(self):
for val, _ in self.values:
expect = {str(val): 'value'}
val = {val: 'value'}
self.assertEqual(
expect,
json.loads(json.dumps(val)))
self.assertEqual(
expect,
json.loads(json.dumps(val, bigint_as_string=True)))

View File

@ -0,0 +1,30 @@
from unittest import TestCase
import simplejson as json
def default_iterable(obj):
return list(obj)
class TestCheckCircular(TestCase):
def test_circular_dict(self):
dct = {}
dct['a'] = dct
self.assertRaises(ValueError, json.dumps, dct)
def test_circular_list(self):
lst = []
lst.append(lst)
self.assertRaises(ValueError, json.dumps, lst)
def test_circular_composite(self):
dct2 = {}
dct2['a'] = []
dct2['a'].append(dct2)
self.assertRaises(ValueError, json.dumps, dct2)
def test_circular_default(self):
json.dumps([set()], default=default_iterable)
self.assertRaises(TypeError, json.dumps, [set()])
def test_circular_off_default(self):
json.dumps([set()], default=default_iterable, check_circular=False)
self.assertRaises(TypeError, json.dumps, [set()], check_circular=False)

View File

@ -0,0 +1,71 @@
import decimal
from decimal import Decimal
from unittest import TestCase
from simplejson.compat import StringIO, reload_module
import simplejson as json
class TestDecimal(TestCase):
NUMS = "1.0", "10.00", "1.1", "1234567890.1234567890", "500"
def dumps(self, obj, **kw):
sio = StringIO()
json.dump(obj, sio, **kw)
res = json.dumps(obj, **kw)
self.assertEqual(res, sio.getvalue())
return res
def loads(self, s, **kw):
sio = StringIO(s)
res = json.loads(s, **kw)
self.assertEqual(res, json.load(sio, **kw))
return res
def test_decimal_encode(self):
for d in map(Decimal, self.NUMS):
self.assertEqual(self.dumps(d, use_decimal=True), str(d))
def test_decimal_decode(self):
for s in self.NUMS:
self.assertEqual(self.loads(s, parse_float=Decimal), Decimal(s))
def test_stringify_key(self):
for d in map(Decimal, self.NUMS):
v = {d: d}
self.assertEqual(
self.loads(
self.dumps(v, use_decimal=True), parse_float=Decimal),
{str(d): d})
def test_decimal_roundtrip(self):
for d in map(Decimal, self.NUMS):
# The type might not be the same (int and Decimal) but they
# should still compare equal.
for v in [d, [d], {'': d}]:
self.assertEqual(
self.loads(
self.dumps(v, use_decimal=True), parse_float=Decimal),
v)
def test_decimal_defaults(self):
d = Decimal('1.1')
# use_decimal=True is the default
self.assertRaises(TypeError, json.dumps, d, use_decimal=False)
self.assertEqual('1.1', json.dumps(d))
self.assertEqual('1.1', json.dumps(d, use_decimal=True))
self.assertRaises(TypeError, json.dump, d, StringIO(),
use_decimal=False)
sio = StringIO()
json.dump(d, sio)
self.assertEqual('1.1', sio.getvalue())
sio = StringIO()
json.dump(d, sio, use_decimal=True)
self.assertEqual('1.1', sio.getvalue())
def test_decimal_reload(self):
# Simulate a subinterpreter that reloads the Python modules but not
# the C code https://github.com/simplejson/simplejson/issues/34
global Decimal
Decimal = reload_module(decimal).Decimal
import simplejson.encoder
simplejson.encoder.Decimal = Decimal
self.test_decimal_roundtrip()

View File

@ -0,0 +1,88 @@
import decimal
from unittest import TestCase
import simplejson as json
from simplejson.compat import StringIO
from simplejson import OrderedDict
class TestDecode(TestCase):
if not hasattr(TestCase, 'assertIs'):
def assertIs(self, a, b):
self.assertTrue(a is b, '%r is %r' % (a, b))
def test_decimal(self):
rval = json.loads('1.1', parse_float=decimal.Decimal)
self.assertTrue(isinstance(rval, decimal.Decimal))
self.assertEqual(rval, decimal.Decimal('1.1'))
def test_float(self):
rval = json.loads('1', parse_int=float)
self.assertTrue(isinstance(rval, float))
self.assertEqual(rval, 1.0)
def test_decoder_optimizations(self):
# Several optimizations were made that skip over calls to
# the whitespace regex, so this test is designed to try and
# exercise the uncommon cases. The array cases are already covered.
rval = json.loads('{ "key" : "value" , "k":"v" }')
self.assertEqual(rval, {"key":"value", "k":"v"})
def test_empty_objects(self):
s = '{}'
self.assertEqual(json.loads(s), eval(s))
s = '[]'
self.assertEqual(json.loads(s), eval(s))
s = '""'
self.assertEqual(json.loads(s), eval(s))
def test_object_pairs_hook(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
("qrt", 5), ("pad", 6), ("hoy", 7)]
self.assertEqual(json.loads(s), eval(s))
self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p)
self.assertEqual(json.load(StringIO(s),
object_pairs_hook=lambda x: x), p)
od = json.loads(s, object_pairs_hook=OrderedDict)
self.assertEqual(od, OrderedDict(p))
self.assertEqual(type(od), OrderedDict)
# the object_pairs_hook takes priority over the object_hook
self.assertEqual(json.loads(s,
object_pairs_hook=OrderedDict,
object_hook=lambda x: None),
OrderedDict(p))
def check_keys_reuse(self, source, loads):
rval = loads(source)
(a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
self.assertIs(a, c)
self.assertIs(b, d)
def test_keys_reuse_str(self):
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'.encode('utf8')
self.check_keys_reuse(s, json.loads)
def test_keys_reuse_unicode(self):
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
self.check_keys_reuse(s, json.loads)
def test_empty_strings(self):
self.assertEqual(json.loads('""'), "")
self.assertEqual(json.loads('""'), "")
self.assertEqual(json.loads('[""]'), [""])
self.assertEqual(json.loads('[""]'), [""])
def test_raw_decode(self):
cls = json.decoder.JSONDecoder
self.assertEqual(
({'a': {}}, 9),
cls().raw_decode("{\"a\": {}}"))
# http://code.google.com/p/simplejson/issues/detail?id=85
self.assertEqual(
({'a': {}}, 9),
cls(object_pairs_hook=dict).raw_decode("{\"a\": {}}"))
# https://github.com/simplejson/simplejson/pull/38
self.assertEqual(
({'a': {}}, 11),
cls().raw_decode(" \n{\"a\": {}}"))

View File

@ -0,0 +1,9 @@
from unittest import TestCase
import simplejson as json
class TestDefault(TestCase):
def test_default(self):
self.assertEqual(
json.dumps(type, default=repr),
json.dumps(repr(type)))

View File

@ -0,0 +1,121 @@
from unittest import TestCase
from simplejson.compat import StringIO, long_type, b, binary_type, PY3
import simplejson as json
def as_text_type(s):
if PY3 and isinstance(s, binary_type):
return s.decode('ascii')
return s
class TestDump(TestCase):
def test_dump(self):
sio = StringIO()
json.dump({}, sio)
self.assertEqual(sio.getvalue(), '{}')
def test_constants(self):
for c in [None, True, False]:
self.assertTrue(json.loads(json.dumps(c)) is c)
self.assertTrue(json.loads(json.dumps([c]))[0] is c)
self.assertTrue(json.loads(json.dumps({'a': c}))['a'] is c)
def test_stringify_key(self):
items = [(b('bytes'), 'bytes'),
(1.0, '1.0'),
(10, '10'),
(True, 'true'),
(False, 'false'),
(None, 'null'),
(long_type(100), '100')]
for k, expect in items:
self.assertEqual(
json.loads(json.dumps({k: expect})),
{expect: expect})
self.assertEqual(
json.loads(json.dumps({k: expect}, sort_keys=True)),
{expect: expect})
self.assertRaises(TypeError, json.dumps, {json: 1})
for v in [{}, {'other': 1}, {b('derp'): 1, 'herp': 2}]:
for sort_keys in [False, True]:
v0 = dict(v)
v0[json] = 1
v1 = dict((as_text_type(key), val) for (key, val) in list(v.items()))
self.assertEqual(
json.loads(json.dumps(v0, skipkeys=True, sort_keys=sort_keys)),
v1)
self.assertEqual(
json.loads(json.dumps({'': v0}, skipkeys=True, sort_keys=sort_keys)),
{'': v1})
self.assertEqual(
json.loads(json.dumps([v0], skipkeys=True, sort_keys=sort_keys)),
[v1])
def test_dumps(self):
self.assertEqual(json.dumps({}), '{}')
def test_encode_truefalse(self):
self.assertEqual(json.dumps(
{True: False, False: True}, sort_keys=True),
'{"false": true, "true": false}')
self.assertEqual(
json.dumps(
{2: 3.0,
4.0: long_type(5),
False: 1,
long_type(6): True,
"7": 0},
sort_keys=True),
'{"2": 3.0, "4.0": 5, "6": true, "7": 0, "false": 1}')
def test_ordered_dict(self):
# http://bugs.python.org/issue6105
items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)]
s = json.dumps(json.OrderedDict(items))
self.assertEqual(
s,
'{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}')
def test_indent_unknown_type_acceptance(self):
"""
A test against the regression mentioned at `github issue 29`_.
The indent parameter should accept any type which pretends to be
an instance of int or long when it comes to being multiplied by
strings, even if it is not actually an int or long, for
backwards compatibility.
.. _github issue 29:
http://github.com/simplejson/simplejson/issue/29
"""
class AwesomeInt(object):
"""An awesome reimplementation of integers"""
def __init__(self, *args, **kwargs):
if len(args) > 0:
# [construct from literals, objects, etc.]
# ...
# Finally, if args[0] is an integer, store it
if isinstance(args[0], int):
self._int = args[0]
# [various methods]
def __mul__(self, other):
# [various ways to multiply AwesomeInt objects]
# ... finally, if the right-hand operand is not awesome enough,
# try to do a normal integer multiplication
if hasattr(self, '_int'):
return self._int * other
else:
raise NotImplementedError("To do non-awesome things with"
" this object, please construct it from an integer!")
s = json.dumps([0, 1, 2], indent=AwesomeInt(3))
self.assertEqual(s, '[\n 0,\n 1,\n 2\n]')
def test_accumulator(self):
# the C API uses an accumulator that collects after 100,000 appends
lst = [0] * 100000
self.assertEqual(json.loads(json.dumps(lst)), lst)

View File

@ -0,0 +1,47 @@
from unittest import TestCase
import simplejson.encoder
from simplejson.compat import b
CASES = [
('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
('controls', '"controls"'),
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
(' s p a c e d ', '" s p a c e d "'),
('\U0001d120', '"\\ud834\\udd20"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
(b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
(b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
]
class TestEncodeBaseStringAscii(TestCase):
def test_py_encode_basestring_ascii(self):
self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii)
def test_c_encode_basestring_ascii(self):
if not simplejson.encoder.c_encode_basestring_ascii:
return
self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii)
def _test_encode_basestring_ascii(self, encode_basestring_ascii):
fname = encode_basestring_ascii.__name__
for input_string, expect in CASES:
result = encode_basestring_ascii(input_string)
#self.assertEqual(result, expect,
# '{0!r} != {1!r} for {2}({3!r})'.format(
# result, expect, fname, input_string))
self.assertEqual(result, expect,
'%r != %r for %s(%r)' % (result, expect, fname, input_string))
def test_sorted_dict(self):
items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)]
s = simplejson.dumps(dict(items), sort_keys=True)
self.assertEqual(s, '{"five": 5, "four": 4, "one": 1, "three": 3, "two": 2}')

View File

@ -0,0 +1,30 @@
import unittest
import simplejson as json
class TestEncodeForHTML(unittest.TestCase):
def setUp(self):
self.decoder = json.JSONDecoder()
self.encoder = json.JSONEncoderForHTML()
def test_basic_encode(self):
self.assertEqual(r'"\u0026"', self.encoder.encode('&'))
self.assertEqual(r'"\u003c"', self.encoder.encode('<'))
self.assertEqual(r'"\u003e"', self.encoder.encode('>'))
def test_basic_roundtrip(self):
for char in '&<>':
self.assertEqual(
char, self.decoder.decode(
self.encoder.encode(char)))
def test_prevent_script_breakout(self):
bad_string = '</script><script>alert("gotcha")</script>'
self.assertEqual(
r'"\u003c/script\u003e\u003cscript\u003e'
r'alert(\"gotcha\")\u003c/script\u003e"',
self.encoder.encode(bad_string))
self.assertEqual(
bad_string, self.decoder.decode(
self.encoder.encode(bad_string)))

View File

@ -0,0 +1,35 @@
import sys
from unittest import TestCase
import simplejson as json
from simplejson.compat import u, b
class TestErrors(TestCase):
def test_string_keys_error(self):
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0, ('d',): 'D tuple'}]
self.assertRaises(TypeError, json.dumps, data)
def test_decode_error(self):
err = None
try:
json.loads('{}\na\nb')
except json.JSONDecodeError:
err = sys.exc_info()[1]
else:
self.fail('Expected JSONDecodeError')
self.assertEqual(err.lineno, 2)
self.assertEqual(err.colno, 1)
self.assertEqual(err.endlineno, 3)
self.assertEqual(err.endcolno, 2)
def test_scan_error(self):
err = None
for t in (u, b):
try:
json.loads(t('{"asdf": "'))
except json.JSONDecodeError:
err = sys.exc_info()[1]
else:
self.fail('Expected JSONDecodeError')
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, 10)

View File

@ -0,0 +1,176 @@
import sys
from unittest import TestCase
import simplejson as json
# 2007-10-05
JSONDOCS = [
# http://json.org/JSON_checker/test/fail1.json
'"A JSON payload should be an object or array, not a string."',
# http://json.org/JSON_checker/test/fail2.json
'["Unclosed array"',
# http://json.org/JSON_checker/test/fail3.json
'{unquoted_key: "keys must be quoted"}',
# http://json.org/JSON_checker/test/fail4.json
'["extra comma",]',
# http://json.org/JSON_checker/test/fail5.json
'["double extra comma",,]',
# http://json.org/JSON_checker/test/fail6.json
'[ , "<-- missing value"]',
# http://json.org/JSON_checker/test/fail7.json
'["Comma after the close"],',
# http://json.org/JSON_checker/test/fail8.json
'["Extra close"]]',
# http://json.org/JSON_checker/test/fail9.json
'{"Extra comma": true,}',
# http://json.org/JSON_checker/test/fail10.json
'{"Extra value after close": true} "misplaced quoted value"',
# http://json.org/JSON_checker/test/fail11.json
'{"Illegal expression": 1 + 2}',
# http://json.org/JSON_checker/test/fail12.json
'{"Illegal invocation": alert()}',
# http://json.org/JSON_checker/test/fail13.json
'{"Numbers cannot have leading zeroes": 013}',
# http://json.org/JSON_checker/test/fail14.json
'{"Numbers cannot be hex": 0x14}',
# http://json.org/JSON_checker/test/fail15.json
'["Illegal backslash escape: \\x15"]',
# http://json.org/JSON_checker/test/fail16.json
'[\\naked]',
# http://json.org/JSON_checker/test/fail17.json
'["Illegal backslash escape: \\017"]',
# http://json.org/JSON_checker/test/fail18.json
'[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
# http://json.org/JSON_checker/test/fail19.json
'{"Missing colon" null}',
# http://json.org/JSON_checker/test/fail20.json
'{"Double colon":: null}',
# http://json.org/JSON_checker/test/fail21.json
'{"Comma instead of colon", null}',
# http://json.org/JSON_checker/test/fail22.json
'["Colon instead of comma": false]',
# http://json.org/JSON_checker/test/fail23.json
'["Bad value", truth]',
# http://json.org/JSON_checker/test/fail24.json
"['single quote']",
# http://json.org/JSON_checker/test/fail25.json
'["\ttab\tcharacter\tin\tstring\t"]',
# http://json.org/JSON_checker/test/fail26.json
'["tab\\ character\\ in\\ string\\ "]',
# http://json.org/JSON_checker/test/fail27.json
'["line\nbreak"]',
# http://json.org/JSON_checker/test/fail28.json
'["line\\\nbreak"]',
# http://json.org/JSON_checker/test/fail29.json
'[0e]',
# http://json.org/JSON_checker/test/fail30.json
'[0e+]',
# http://json.org/JSON_checker/test/fail31.json
'[0e+-1]',
# http://json.org/JSON_checker/test/fail32.json
'{"Comma instead if closing brace": true,',
# http://json.org/JSON_checker/test/fail33.json
'["mismatch"}',
# http://code.google.com/p/simplejson/issues/detail?id=3
'["A\u001FZ control characters in string"]',
# misc based on coverage
'{',
'{]',
'{"foo": "bar"]',
'{"foo": "bar"',
'nul',
'nulx',
'-',
'-x',
'-e',
'-e0',
'-Infinite',
'-Inf',
'Infinit',
'Infinite',
'NaM',
'NuN',
'falsy',
'fal',
'trug',
'tru',
'1e',
'1ex',
'1e-',
'1e-x',
]
SKIPS = {
1: "why not have a string payload?",
18: "spec doesn't specify any nesting limitations",
}
class TestFail(TestCase):
def test_failures(self):
for idx, doc in enumerate(JSONDOCS):
idx = idx + 1
if idx in SKIPS:
json.loads(doc)
continue
try:
json.loads(doc)
except json.JSONDecodeError:
pass
else:
self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
def test_array_decoder_issue46(self):
# http://code.google.com/p/simplejson/issues/detail?id=46
for doc in ['[,]', '[,]']:
try:
json.loads(doc)
except json.JSONDecodeError:
e = sys.exc_info()[1]
self.assertEqual(e.pos, 1)
self.assertEqual(e.lineno, 1)
self.assertEqual(e.colno, 2)
except Exception:
e = sys.exc_info()[1]
self.fail("Unexpected exception raised %r %s" % (e, e))
else:
self.fail("Unexpected success parsing '[,]'")
def test_truncated_input(self):
test_cases = [
('', 'Expecting value', 0),
('[', "Expecting value or ']'", 1),
('[42', "Expecting ',' delimiter", 3),
('[42,', 'Expecting value', 4),
('["', 'Unterminated string starting at', 1),
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
('{"spam":', 'Expecting value', 8),
('{"spam":42', "Expecting ',' delimiter", 10),
('{"spam":42,', 'Expecting property name enclosed in double quotes',
11),
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
('[,', "Expecting value", 1),
]
for data, msg, idx in test_cases:
try:
json.loads(data)
except json.JSONDecodeError:
e = sys.exc_info()[1]
self.assertEqual(
e.msg[:len(msg)],
msg,
"%r doesn't start with %r for %r" % (e.msg, msg, data))
self.assertEqual(
e.pos, idx,
"pos %r != %r for %r" % (e.pos, idx, data))
except Exception:
e = sys.exc_info()[1]
self.fail("Unexpected exception raised %r %s" % (e, e))
else:
self.fail("Unexpected success parsing '%r'" % (data,))

View File

@ -0,0 +1,35 @@
import math
from unittest import TestCase
from simplejson.compat import long_type, text_type
import simplejson as json
from simplejson.decoder import NaN, PosInf, NegInf
class TestFloat(TestCase):
def test_degenerates_allow(self):
for inf in (PosInf, NegInf):
self.assertEqual(json.loads(json.dumps(inf)), inf)
# Python 2.5 doesn't have math.isnan
nan = json.loads(json.dumps(NaN))
self.assertTrue((0 + nan) != nan)
def test_degenerates_ignore(self):
for f in (PosInf, NegInf, NaN):
self.assertEqual(json.loads(json.dumps(f, ignore_nan=True)), None)
def test_degenerates_deny(self):
for f in (PosInf, NegInf, NaN):
self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100,
math.pi**-100, 3.1]:
self.assertEqual(float(json.dumps(num)), num)
self.assertEqual(json.loads(json.dumps(num)), num)
self.assertEqual(json.loads(text_type(json.dumps(num))), num)
def test_ints(self):
for num in [1, long_type(1), 1<<32, 1<<64]:
self.assertEqual(json.dumps(num), str(num))
self.assertEqual(int(json.dumps(num)), num)
self.assertEqual(json.loads(json.dumps(num)), num)
self.assertEqual(json.loads(text_type(json.dumps(num))), num)

View File

@ -0,0 +1,97 @@
import unittest
import simplejson as json
class ForJson(object):
def for_json(self):
return {'for_json': 1}
class NestedForJson(object):
def for_json(self):
return {'nested': ForJson()}
class ForJsonList(object):
def for_json(self):
return ['list']
class DictForJson(dict):
def for_json(self):
return {'alpha': 1}
class ListForJson(list):
def for_json(self):
return ['list']
class TestForJson(unittest.TestCase):
def assertRoundTrip(self, obj, other, for_json=True):
if for_json is None:
# None will use the default
s = json.dumps(obj)
else:
s = json.dumps(obj, for_json=for_json)
self.assertEqual(
json.loads(s),
other)
def test_for_json_encodes_stand_alone_object(self):
self.assertRoundTrip(
ForJson(),
ForJson().for_json())
def test_for_json_encodes_object_nested_in_dict(self):
self.assertRoundTrip(
{'hooray': ForJson()},
{'hooray': ForJson().for_json()})
def test_for_json_encodes_object_nested_in_list_within_dict(self):
self.assertRoundTrip(
{'list': [0, ForJson(), 2, 3]},
{'list': [0, ForJson().for_json(), 2, 3]})
def test_for_json_encodes_object_nested_within_object(self):
self.assertRoundTrip(
NestedForJson(),
{'nested': {'for_json': 1}})
def test_for_json_encodes_list(self):
self.assertRoundTrip(
ForJsonList(),
ForJsonList().for_json())
def test_for_json_encodes_list_within_object(self):
self.assertRoundTrip(
{'nested': ForJsonList()},
{'nested': ForJsonList().for_json()})
def test_for_json_encodes_dict_subclass(self):
self.assertRoundTrip(
DictForJson(a=1),
DictForJson(a=1).for_json())
def test_for_json_encodes_list_subclass(self):
self.assertRoundTrip(
ListForJson(['l']),
ListForJson(['l']).for_json())
def test_for_json_ignored_if_not_true_with_dict_subclass(self):
for for_json in (None, False):
self.assertRoundTrip(
DictForJson(a=1),
{'a': 1},
for_json=for_json)
def test_for_json_ignored_if_not_true_with_list_subclass(self):
for for_json in (None, False):
self.assertRoundTrip(
ListForJson(['l']),
['l'],
for_json=for_json)
def test_raises_typeerror_if_for_json_not_true_with_object(self):
self.assertRaises(TypeError, json.dumps, ForJson())
self.assertRaises(TypeError, json.dumps, ForJson(), for_json=False)

View File

@ -0,0 +1,86 @@
from unittest import TestCase
import textwrap
import simplejson as json
from simplejson.compat import StringIO
class TestIndent(TestCase):
def test_indent(self):
h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh',
'i-vhbjkhnth',
{'nifty': 87}, {'field': 'yes', 'morefield': False} ]
expect = textwrap.dedent("""\
[
\t[
\t\t"blorpie"
\t],
\t[
\t\t"whoops"
\t],
\t[],
\t"d-shtaeou",
\t"d-nthiouh",
\t"i-vhbjkhnth",
\t{
\t\t"nifty": 87
\t},
\t{
\t\t"field": "yes",
\t\t"morefield": false
\t}
]""")
d1 = json.dumps(h)
d2 = json.dumps(h, indent='\t', sort_keys=True, separators=(',', ': '))
d3 = json.dumps(h, indent=' ', sort_keys=True, separators=(',', ': '))
d4 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': '))
h1 = json.loads(d1)
h2 = json.loads(d2)
h3 = json.loads(d3)
h4 = json.loads(d4)
self.assertEqual(h1, h)
self.assertEqual(h2, h)
self.assertEqual(h3, h)
self.assertEqual(h4, h)
self.assertEqual(d3, expect.replace('\t', ' '))
self.assertEqual(d4, expect.replace('\t', ' '))
# NOTE: Python 2.4 textwrap.dedent converts tabs to spaces,
# so the following is expected to fail. Python 2.4 is not a
# supported platform in simplejson 2.1.0+.
self.assertEqual(d2, expect)
def test_indent0(self):
h = {3: 1}
def check(indent, expected):
d1 = json.dumps(h, indent=indent)
self.assertEqual(d1, expected)
sio = StringIO()
json.dump(h, sio, indent=indent)
self.assertEqual(sio.getvalue(), expected)
# indent=0 should emit newlines
check(0, '{\n"3": 1\n}')
# indent=None is more compact
check(None, '{"3": 1}')
def test_separators(self):
lst = [1,2,3,4]
expect = '[\n1,\n2,\n3,\n4\n]'
expect_spaces = '[\n1, \n2, \n3, \n4\n]'
# Ensure that separators still works
self.assertEqual(
expect_spaces,
json.dumps(lst, indent=0, separators=(', ', ': ')))
# Force the new defaults
self.assertEqual(
expect,
json.dumps(lst, indent=0, separators=(',', ': ')))
# Added in 2.1.4
self.assertEqual(
expect,
json.dumps(lst, indent=0))

View File

@ -0,0 +1,20 @@
from unittest import TestCase
import simplejson as json
from operator import itemgetter
class TestItemSortKey(TestCase):
def test_simple_first(self):
a = {'a': 1, 'c': 5, 'jack': 'jill', 'pick': 'axe', 'array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'}
self.assertEqual(
'{"a": 1, "c": 5, "crate": "dog", "jack": "jill", "pick": "axe", "zeak": "oh", "array": [1, 5, 6, 9], "tuple": [83, 12, 3]}',
json.dumps(a, item_sort_key=json.simple_first))
def test_case(self):
a = {'a': 1, 'c': 5, 'Jack': 'jill', 'pick': 'axe', 'Array': [1, 5, 6, 9], 'tuple': (83, 12, 3), 'crate': 'dog', 'zeak': 'oh'}
self.assertEqual(
'{"Array": [1, 5, 6, 9], "Jack": "jill", "a": 1, "c": 5, "crate": "dog", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}',
json.dumps(a, item_sort_key=itemgetter(0)))
self.assertEqual(
'{"a": 1, "Array": [1, 5, 6, 9], "c": 5, "crate": "dog", "Jack": "jill", "pick": "axe", "tuple": [83, 12, 3], "zeak": "oh"}',
json.dumps(a, item_sort_key=lambda kv: kv[0].lower()))

View File

@ -0,0 +1,122 @@
import unittest
import simplejson as json
from simplejson.compat import StringIO
try:
from collections import namedtuple
except ImportError:
class Value(tuple):
def __new__(cls, *args):
return tuple.__new__(cls, args)
def _asdict(self):
return {'value': self[0]}
class Point(tuple):
def __new__(cls, *args):
return tuple.__new__(cls, args)
def _asdict(self):
return {'x': self[0], 'y': self[1]}
else:
Value = namedtuple('Value', ['value'])
Point = namedtuple('Point', ['x', 'y'])
class DuckValue(object):
def __init__(self, *args):
self.value = Value(*args)
def _asdict(self):
return self.value._asdict()
class DuckPoint(object):
def __init__(self, *args):
self.point = Point(*args)
def _asdict(self):
return self.point._asdict()
class DeadDuck(object):
_asdict = None
class DeadDict(dict):
_asdict = None
CONSTRUCTORS = [
lambda v: v,
lambda v: [v],
lambda v: [{'key': v}],
]
class TestNamedTuple(unittest.TestCase):
def test_namedtuple_dumps(self):
for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]:
d = v._asdict()
self.assertEqual(d, json.loads(json.dumps(v)))
self.assertEqual(
d,
json.loads(json.dumps(v, namedtuple_as_object=True)))
self.assertEqual(d, json.loads(json.dumps(v, tuple_as_array=False)))
self.assertEqual(
d,
json.loads(json.dumps(v, namedtuple_as_object=True,
tuple_as_array=False)))
def test_namedtuple_dumps_false(self):
for v in [Value(1), Point(1, 2)]:
l = list(v)
self.assertEqual(
l,
json.loads(json.dumps(v, namedtuple_as_object=False)))
self.assertRaises(TypeError, json.dumps, v,
tuple_as_array=False, namedtuple_as_object=False)
def test_namedtuple_dump(self):
for v in [Value(1), Point(1, 2), DuckValue(1), DuckPoint(1, 2)]:
d = v._asdict()
sio = StringIO()
json.dump(v, sio)
self.assertEqual(d, json.loads(sio.getvalue()))
sio = StringIO()
json.dump(v, sio, namedtuple_as_object=True)
self.assertEqual(
d,
json.loads(sio.getvalue()))
sio = StringIO()
json.dump(v, sio, tuple_as_array=False)
self.assertEqual(d, json.loads(sio.getvalue()))
sio = StringIO()
json.dump(v, sio, namedtuple_as_object=True,
tuple_as_array=False)
self.assertEqual(
d,
json.loads(sio.getvalue()))
def test_namedtuple_dump_false(self):
for v in [Value(1), Point(1, 2)]:
l = list(v)
sio = StringIO()
json.dump(v, sio, namedtuple_as_object=False)
self.assertEqual(
l,
json.loads(sio.getvalue()))
self.assertRaises(TypeError, json.dump, v, StringIO(),
tuple_as_array=False, namedtuple_as_object=False)
def test_asdict_not_callable_dump(self):
for f in CONSTRUCTORS:
self.assertRaises(TypeError,
json.dump, f(DeadDuck()), StringIO(), namedtuple_as_object=True)
sio = StringIO()
json.dump(f(DeadDict()), sio, namedtuple_as_object=True)
self.assertEqual(
json.dumps(f({})),
sio.getvalue())
def test_asdict_not_callable_dumps(self):
for f in CONSTRUCTORS:
self.assertRaises(TypeError,
json.dumps, f(DeadDuck()), namedtuple_as_object=True)
self.assertEqual(
json.dumps(f({})),
json.dumps(f(DeadDict()), namedtuple_as_object=True))

View File

@ -0,0 +1,71 @@
from unittest import TestCase
import simplejson as json
# from http://json.org/JSON_checker/test/pass1.json
JSON = r'''
[
"JSON Test Pattern pass1",
{"object with 1 member":["array with 1 element"]},
{},
[],
-42,
true,
false,
null,
{
"integer": 1234567890,
"real": -9876.543210,
"e": 0.123456789e-12,
"E": 1.234567890E+34,
"": 23456789012E66,
"zero": 0,
"one": 1,
"space": " ",
"quote": "\"",
"backslash": "\\",
"controls": "\b\f\n\r\t",
"slash": "/ & \/",
"alpha": "abcdefghijklmnopqrstuvwyz",
"ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
"digit": "0123456789",
"special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
"hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
"true": true,
"false": false,
"null": null,
"array":[ ],
"object":{ },
"address": "50 St. James Street",
"url": "http://www.JSON.org/",
"comment": "// /* <!-- --",
"# -- --> */": " ",
" s p a c e d " :[1,2 , 3
,
4 , 5 , 6 ,7 ],"compact": [1,2,3,4,5,6,7],
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
"quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
: "A key can be any string"
},
0.5 ,98.6
,
99.44
,
1066,
1e1,
0.1e1,
1e-1,
1e00,2e+00,2e-00
,"rosebud"]
'''
class TestPass1(TestCase):
def test_parse(self):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
self.assertEqual(res, json.loads(out))

View File

@ -0,0 +1,14 @@
from unittest import TestCase
import simplejson as json
# from http://json.org/JSON_checker/test/pass2.json
JSON = r'''
[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
'''
class TestPass2(TestCase):
def test_parse(self):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
self.assertEqual(res, json.loads(out))

View File

@ -0,0 +1,20 @@
from unittest import TestCase
import simplejson as json
# from http://json.org/JSON_checker/test/pass3.json
JSON = r'''
{
"JSON Test Pattern pass3": {
"The outermost value": "must be an object or array.",
"In this test": "It is an object."
}
}
'''
class TestPass3(TestCase):
def test_parse(self):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
self.assertEqual(res, json.loads(out))

View File

@ -0,0 +1,67 @@
from unittest import TestCase
import simplejson as json
class JSONTestObject:
pass
class RecursiveJSONEncoder(json.JSONEncoder):
recurse = False
def default(self, o):
if o is JSONTestObject:
if self.recurse:
return [JSONTestObject]
else:
return 'JSONTestObject'
return json.JSONEncoder.default(o)
class TestRecursion(TestCase):
def test_listrecursion(self):
x = []
x.append(x)
try:
json.dumps(x)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on list recursion")
x = []
y = [x]
x.append(y)
try:
json.dumps(x)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on alternating list recursion")
y = []
x = [y, y]
# ensure that the marker is cleared
json.dumps(x)
def test_dictrecursion(self):
x = {}
x["test"] = x
try:
json.dumps(x)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on dict recursion")
x = {}
y = {"a": x, "b": x}
# ensure that the marker is cleared
json.dumps(y)
def test_defaultrecursion(self):
enc = RecursiveJSONEncoder()
self.assertEqual(enc.encode(JSONTestObject), '"JSONTestObject"')
enc.recurse = True
try:
enc.encode(JSONTestObject)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on default recursion")

View File

@ -0,0 +1,194 @@
import sys
from unittest import TestCase
import simplejson as json
import simplejson.decoder
from simplejson.compat import b, PY3
class TestScanString(TestCase):
# The bytes type is intentionally not used in most of these tests
# under Python 3 because the decoder immediately coerces to str before
# calling scanstring. In Python 2 we are testing the code paths
# for both unicode and str.
#
# The reason this is done is because Python 3 would require
# entirely different code paths for parsing bytes and str.
#
def test_py_scanstring(self):
self._test_scanstring(simplejson.decoder.py_scanstring)
def test_c_scanstring(self):
if not simplejson.decoder.c_scanstring:
return
self._test_scanstring(simplejson.decoder.c_scanstring)
def _test_scanstring(self, scanstring):
if sys.maxunicode == 65535:
self.assertEqual(
scanstring('"z\U0001d120x"', 1, None, True),
('z\U0001d120x', 6))
else:
self.assertEqual(
scanstring('"z\U0001d120x"', 1, None, True),
('z\U0001d120x', 5))
self.assertEqual(
scanstring('"\\u007b"', 1, None, True),
('{', 8))
self.assertEqual(
scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
('A JSON payload should be an object or array, not a string.', 60))
self.assertEqual(
scanstring('["Unclosed array"', 2, None, True),
('Unclosed array', 17))
self.assertEqual(
scanstring('["extra comma",]', 2, None, True),
('extra comma', 14))
self.assertEqual(
scanstring('["double extra comma",,]', 2, None, True),
('double extra comma', 21))
self.assertEqual(
scanstring('["Comma after the close"],', 2, None, True),
('Comma after the close', 24))
self.assertEqual(
scanstring('["Extra close"]]', 2, None, True),
('Extra close', 14))
self.assertEqual(
scanstring('{"Extra comma": true,}', 2, None, True),
('Extra comma', 14))
self.assertEqual(
scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
('Extra value after close', 26))
self.assertEqual(
scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
('Illegal expression', 21))
self.assertEqual(
scanstring('{"Illegal invocation": alert()}', 2, None, True),
('Illegal invocation', 21))
self.assertEqual(
scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
('Numbers cannot have leading zeroes', 37))
self.assertEqual(
scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
('Numbers cannot be hex', 24))
self.assertEqual(
scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
('Too deep', 30))
self.assertEqual(
scanstring('{"Missing colon" null}', 2, None, True),
('Missing colon', 16))
self.assertEqual(
scanstring('{"Double colon":: null}', 2, None, True),
('Double colon', 15))
self.assertEqual(
scanstring('{"Comma instead of colon", null}', 2, None, True),
('Comma instead of colon', 25))
self.assertEqual(
scanstring('["Colon instead of comma": false]', 2, None, True),
('Colon instead of comma', 25))
self.assertEqual(
scanstring('["Bad value", truth]', 2, None, True),
('Bad value', 12))
for c in map(chr, list(range(0x00, 0x1f))):
self.assertEqual(
scanstring(c + '"', 0, None, False),
(c, 2))
self.assertRaises(
ValueError,
scanstring, c + '"', 0, None, True)
self.assertRaises(ValueError, scanstring, '', 0, None, True)
self.assertRaises(ValueError, scanstring, 'a', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True)
if sys.maxunicode > 65535:
self.assertRaises(ValueError,
scanstring, '\\ud834\\u"', 0, None, True)
self.assertRaises(ValueError,
scanstring, '\\ud834\\x0123"', 0, None, True)
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
"xxx")
self.assertRaises(UnicodeDecodeError,
json.encoder.encode_basestring_ascii, b("xx\xff"))
def test_overflow(self):
# Python 2.5 does not have maxsize, Python 3 does not have maxint
maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None))
assert maxsize is not None
self.assertRaises(OverflowError, json.decoder.scanstring, "xxx",
maxsize + 1)
def test_surrogates(self):
scanstring = json.decoder.scanstring
def assertScan(given, expect, test_utf8=True):
givens = [given]
if not PY3 and test_utf8:
givens.append(given.encode('utf8'))
for given in givens:
(res, count) = scanstring(given, 1, None, True)
self.assertEqual(len(given), count)
self.assertEqual(res, expect)
assertScan(
'"z\\ud834\\u0079x"',
'z\ud834yx')
assertScan(
'"z\\ud834\\udd20x"',
'z\U0001d120x')
assertScan(
'"z\\ud834\\ud834\\udd20x"',
'z\ud834\U0001d120x')
assertScan(
'"z\\ud834x"',
'z\ud834x')
assertScan(
'"z\\udd20x"',
'z\udd20x')
assertScan(
'"z\ud834x"',
'z\ud834x')
# It may look strange to join strings together, but Python is drunk.
# https://gist.github.com/etrepum/5538443
assertScan(
'"z\\ud834\udd20x12345"',
''.join(['z\ud834', '\udd20x12345']))
assertScan(
'"z\ud834\\udd20x"',
''.join(['z\ud834', '\udd20x']))
# these have different behavior given UTF8 input, because the surrogate
# pair may be joined (in maxunicode > 65535 builds)
assertScan(
''.join(['"z\ud834', '\udd20x"']),
''.join(['z\ud834', '\udd20x']),
test_utf8=False)
self.assertRaises(ValueError,
scanstring, '"z\\ud83x"', 1, None, True)
self.assertRaises(ValueError,
scanstring, '"z\\ud834\\udd2x"', 1, None, True)

View File

@ -0,0 +1,42 @@
import textwrap
from unittest import TestCase
import simplejson as json
class TestSeparators(TestCase):
def test_separators(self):
h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
{'nifty': 87}, {'field': 'yes', 'morefield': False} ]
expect = textwrap.dedent("""\
[
[
"blorpie"
] ,
[
"whoops"
] ,
[] ,
"d-shtaeou" ,
"d-nthiouh" ,
"i-vhbjkhnth" ,
{
"nifty" : 87
} ,
{
"field" : "yes" ,
"morefield" : false
}
]""")
d1 = json.dumps(h)
d2 = json.dumps(h, indent=' ', sort_keys=True, separators=(' ,', ' : '))
h1 = json.loads(d1)
h2 = json.loads(d2)
self.assertEqual(h1, h)
self.assertEqual(h2, h)
self.assertEqual(d2, expect)

View File

@ -0,0 +1,20 @@
from unittest import TestCase
from simplejson import encoder, scanner
def has_speedups():
return encoder.c_make_encoder is not None
class TestDecode(TestCase):
def test_make_scanner(self):
if not has_speedups():
return
self.assertRaises(AttributeError, scanner.c_make_scanner, 1)
def test_make_encoder(self):
if not has_speedups():
return
self.assertRaises(TypeError, encoder.c_make_encoder,
None,
"\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75",
None)

View File

@ -0,0 +1,97 @@
import os
import sys
import textwrap
import unittest
import subprocess
import tempfile
try:
# Python 3.x
from test.support import strip_python_stderr
except ImportError:
# Python 2.6+
try:
from test.test_support import strip_python_stderr
except ImportError:
# Python 2.5
import re
def strip_python_stderr(stderr):
return re.sub(
r"\[\d+ refs\]\r?\n?$".encode(),
"".encode(),
stderr).strip()
class TestTool(unittest.TestCase):
data = """
[["blorpie"],[ "whoops" ] , [
],\t"d-shtaeou",\r"d-nthiouh",
"i-vhbjkhnth", {"nifty":87}, {"morefield" :\tfalse,"field"
:"yes"} ]
"""
expect = textwrap.dedent("""\
[
[
"blorpie"
],
[
"whoops"
],
[],
"d-shtaeou",
"d-nthiouh",
"i-vhbjkhnth",
{
"nifty": 87
},
{
"field": "yes",
"morefield": false
}
]
""")
def runTool(self, args=None, data=None):
argv = [sys.executable, '-m', 'simplejson.tool']
if args:
argv.extend(args)
proc = subprocess.Popen(argv,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = proc.communicate(data)
self.assertEqual(strip_python_stderr(err), ''.encode())
self.assertEqual(proc.returncode, 0)
return out
def test_stdin_stdout(self):
self.assertEqual(
self.runTool(data=self.data.encode()),
self.expect.encode())
def test_infile_stdout(self):
with tempfile.NamedTemporaryFile() as infile:
infile.write(self.data.encode())
infile.flush()
self.assertEqual(
self.runTool(args=[infile.name]),
self.expect.encode())
def test_infile_outfile(self):
with tempfile.NamedTemporaryFile() as infile:
infile.write(self.data.encode())
infile.flush()
# outfile will get overwritten by tool, so the delete
# may not work on some platforms. Do it manually.
outfile = tempfile.NamedTemporaryFile()
try:
self.assertEqual(
self.runTool(args=[infile.name, outfile.name]),
''.encode())
with open(outfile.name, 'rb') as f:
self.assertEqual(f.read(), self.expect.encode())
finally:
outfile.close()
if os.path.exists(outfile.name):
os.unlink(outfile.name)

View File

@ -0,0 +1,51 @@
import unittest
from simplejson.compat import StringIO
import simplejson as json
class TestTuples(unittest.TestCase):
def test_tuple_array_dumps(self):
t = (1, 2, 3)
expect = json.dumps(list(t))
# Default is True
self.assertEqual(expect, json.dumps(t))
self.assertEqual(expect, json.dumps(t, tuple_as_array=True))
self.assertRaises(TypeError, json.dumps, t, tuple_as_array=False)
# Ensure that the "default" does not get called
self.assertEqual(expect, json.dumps(t, default=repr))
self.assertEqual(expect, json.dumps(t, tuple_as_array=True,
default=repr))
# Ensure that the "default" gets called
self.assertEqual(
json.dumps(repr(t)),
json.dumps(t, tuple_as_array=False, default=repr))
def test_tuple_array_dump(self):
t = (1, 2, 3)
expect = json.dumps(list(t))
# Default is True
sio = StringIO()
json.dump(t, sio)
self.assertEqual(expect, sio.getvalue())
sio = StringIO()
json.dump(t, sio, tuple_as_array=True)
self.assertEqual(expect, sio.getvalue())
self.assertRaises(TypeError, json.dump, t, StringIO(),
tuple_as_array=False)
# Ensure that the "default" does not get called
sio = StringIO()
json.dump(t, sio, default=repr)
self.assertEqual(expect, sio.getvalue())
sio = StringIO()
json.dump(t, sio, tuple_as_array=True, default=repr)
self.assertEqual(expect, sio.getvalue())
# Ensure that the "default" gets called
sio = StringIO()
json.dump(t, sio, tuple_as_array=False, default=repr)
self.assertEqual(
json.dumps(repr(t)),
sio.getvalue())
class TestNamedTuple(unittest.TestCase):
def test_namedtuple_dump(self):
pass

View File

@ -0,0 +1,145 @@
import sys
from unittest import TestCase
import simplejson as json
from simplejson.compat import chr, text_type, b, u
class TestUnicode(TestCase):
def test_encoding1(self):
encoder = json.JSONEncoder(encoding='utf-8')
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
s = u.encode('utf-8')
ju = encoder.encode(u)
js = encoder.encode(s)
self.assertEqual(ju, js)
def test_encoding2(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
s = u.encode('utf-8')
ju = json.dumps(u, encoding='utf-8')
js = json.dumps(s, encoding='utf-8')
self.assertEqual(ju, js)
def test_encoding3(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps(u)
self.assertEqual(j, '"\\u03b1\\u03a9"')
def test_encoding4(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps([u])
self.assertEqual(j, '["\\u03b1\\u03a9"]')
def test_encoding5(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps(u, ensure_ascii=False)
self.assertEqual(j, '"' + u + '"')
def test_encoding6(self):
u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps([u], ensure_ascii=False)
self.assertEqual(j, '["' + u + '"]')
def test_big_unicode_encode(self):
u = '\U0001d120'
self.assertEqual(json.dumps(u), '"\\ud834\\udd20"')
self.assertEqual(json.dumps(u, ensure_ascii=False), '"\U0001d120"')
def test_big_unicode_decode(self):
u = 'z\U0001d120x'
self.assertEqual(json.loads('"' + u + '"'), u)
self.assertEqual(json.loads('"z\\ud834\\udd20x"'), u)
def test_unicode_decode(self):
for i in range(0, 0xd7ff):
u = chr(i)
#s = '"\\u{0:04x}"'.format(i)
s = '"\\u%04x"' % (i,)
self.assertEqual(json.loads(s), u)
def test_object_pairs_hook_with_unicode(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
("qrt", 5), ("pad", 6), ("hoy", 7)]
self.assertEqual(json.loads(s), eval(s))
self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p)
od = json.loads(s, object_pairs_hook=json.OrderedDict)
self.assertEqual(od, json.OrderedDict(p))
self.assertEqual(type(od), json.OrderedDict)
# the object_pairs_hook takes priority over the object_hook
self.assertEqual(json.loads(s,
object_pairs_hook=json.OrderedDict,
object_hook=lambda x: None),
json.OrderedDict(p))
def test_default_encoding(self):
self.assertEqual(json.loads('{"a": "\xe9"}'.encode('utf-8')),
{'a': '\xe9'})
def test_unicode_preservation(self):
self.assertEqual(type(json.loads('""')), text_type)
self.assertEqual(type(json.loads('"a"')), text_type)
self.assertEqual(type(json.loads('["a"]')[0]), text_type)
def test_ensure_ascii_false_returns_unicode(self):
# http://code.google.com/p/simplejson/issues/detail?id=48
self.assertEqual(type(json.dumps([], ensure_ascii=False)), text_type)
self.assertEqual(type(json.dumps(0, ensure_ascii=False)), text_type)
self.assertEqual(type(json.dumps({}, ensure_ascii=False)), text_type)
self.assertEqual(type(json.dumps("", ensure_ascii=False)), text_type)
def test_ensure_ascii_false_bytestring_encoding(self):
# http://code.google.com/p/simplejson/issues/detail?id=48
doc1 = {'quux': b('Arr\xc3\xaat sur images')}
doc2 = {'quux': u('Arr\xeat sur images')}
doc_ascii = '{"quux": "Arr\\u00eat sur images"}'
doc_unicode = '{"quux": "Arr\xeat sur images"}'
self.assertEqual(json.dumps(doc1), doc_ascii)
self.assertEqual(json.dumps(doc2), doc_ascii)
self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode)
self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode)
def test_ensure_ascii_linebreak_encoding(self):
# http://timelessrepo.com/json-isnt-a-javascript-subset
s1 = '\u2029\u2028'
s2 = s1.encode('utf8')
expect = '"\\u2029\\u2028"'
self.assertEqual(json.dumps(s1), expect)
self.assertEqual(json.dumps(s2), expect)
self.assertEqual(json.dumps(s1, ensure_ascii=False), expect)
self.assertEqual(json.dumps(s2, ensure_ascii=False), expect)
def test_invalid_escape_sequences(self):
# incomplete escape sequence
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234')
# invalid escape sequence
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"')
if sys.maxunicode > 65535:
# invalid escape sequence for low surrogate
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"')
def test_ensure_ascii_still_works(self):
# in the ascii range, ensure that everything is the same
for c in map(chr, list(range(0, 127))):
self.assertEqual(
json.dumps(c, ensure_ascii=False),
json.dumps(c))
snowman = '\N{SNOWMAN}'
self.assertEqual(
json.dumps(c, ensure_ascii=False),
'"' + c + '"')

View File

@ -0,0 +1,42 @@
r"""Command-line tool to validate and pretty-print JSON
Usage::
$ echo '{"json":"obj"}' | python -m simplejson.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
Expecting property name: line 1 column 2 (char 2)
"""
import sys
import simplejson as json
def main():
if len(sys.argv) == 1:
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
infile = open(sys.argv[1], 'r')
outfile = sys.stdout
elif len(sys.argv) == 3:
infile = open(sys.argv[1], 'r')
outfile = open(sys.argv[2], 'w')
else:
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
with infile:
try:
obj = json.load(infile,
object_pairs_hook=json.OrderedDict,
use_decimal=True)
except ValueError:
raise SystemExit(sys.exc_info()[1])
with outfile:
json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
outfile.write('\n')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,6 @@
import sys
if sys.platform == 'win32':
from tzlocal.win32 import get_localzone, reload_localzone
else:
from tzlocal.unix import get_localzone, reload_localzone

View File

@ -0,0 +1,64 @@
import sys
import os
from datetime import datetime
import unittest
import pytz3 as pytz
import tzlocal.unix
class TzLocalTests(unittest.TestCase):
def test_env(self):
tz_harare = tzlocal.unix._tz_from_env(':Africa/Harare')
self.assertEqual(tz_harare.zone, 'Africa/Harare')
# Some Unices allow this as well, so we must allow it:
tz_harare = tzlocal.unix._tz_from_env('Africa/Harare')
self.assertEqual(tz_harare.zone, 'Africa/Harare')
local_path = os.path.split(__file__)[0]
tz_local = tzlocal.unix._tz_from_env(':' + os.path.join(local_path, 'test_data', 'Harare'))
self.assertEqual(tz_local.zone, 'local')
# Make sure the local timezone is the same as the Harare one above.
# We test this with a past date, so that we don't run into future changes
# of the Harare timezone.
dt = datetime(2012, 1, 1, 5)
self.assertEqual(tz_harare.localize(dt), tz_local.localize(dt))
# Non-zoneinfo timezones are not supported in the TZ environment.
self.assertRaises(pytz.UnknownTimeZoneError, tzlocal.unix._tz_from_env, 'GMT+03:00')
def test_timezone(self):
# Most versions of Ubuntu
local_path = os.path.split(__file__)[0]
tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'timezone'))
self.assertEqual(tz.zone, 'Africa/Harare')
def test_zone_setting(self):
# A ZONE setting in /etc/sysconfig/clock, f ex CentOS
local_path = os.path.split(__file__)[0]
tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'zone_setting'))
self.assertEqual(tz.zone, 'Africa/Harare')
def test_timezone_setting(self):
# A ZONE setting in /etc/conf.d/clock, f ex Gentoo
local_path = os.path.split(__file__)[0]
tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'timezone_setting'))
self.assertEqual(tz.zone, 'Africa/Harare')
def test_only_localtime(self):
local_path = os.path.split(__file__)[0]
tz = tzlocal.unix._get_localzone(_root=os.path.join(local_path, 'test_data', 'localtime'))
self.assertEqual(tz.zone, 'local')
dt = datetime(2012, 1, 1, 5)
self.assertEqual(pytz.timezone('Africa/Harare').localize(dt), tz.localize(dt))
if sys.platform == 'win32':
import tzlocal.win32
class TzWin32Tests(unittest.TestCase):
def test_win32(self):
tz = tzlocal.win32.get_localzone()
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,113 @@
import os
import re
import pytz3 as pytz
_cache_tz = None
def _tz_from_env(tzenv):
if tzenv[0] == ':':
tzenv = tzenv[1:]
# TZ specifies a file
if os.path.exists(tzenv):
with open(tzenv, 'rb') as tzfile:
return pytz.tzfile.build_tzinfo('local', tzfile)
# TZ specifies a zoneinfo zone.
try:
tz = pytz.timezone(tzenv)
# That worked, so we return this:
return tz
except pytz.UnknownTimeZoneError:
raise pytz.UnknownTimeZoneError(
"tzlocal() does not support non-zoneinfo timezones like %s. \n"
"Please use a timezone in the form of Continent/City")
def _get_localzone(_root='/'):
"""Tries to find the local timezone configuration.
This method prefers finding the timezone name and passing that to pytz,
over passing in the localtime file, as in the later case the zoneinfo
name is unknown.
The parameter _root makes the function look for files like /etc/localtime
beneath the _root directory. This is primarily used by the tests.
In normal usage you call the function without parameters."""
tzenv = os.environ.get('TZ')
if tzenv:
return _tz_from_env(tzenv)
# Now look for distribution specific configuration files
# that contain the timezone name.
tzpath = os.path.join(_root, 'etc/timezone')
if os.path.exists(tzpath):
with open(tzpath, 'rb') as tzfile:
data = tzfile.read()
# Issue #3 was that /etc/timezone was a zoneinfo file.
# That's a misconfiguration, but we need to handle it gracefully:
if data[:5] != 'TZif2':
etctz = data.strip().decode()
# Get rid of host definitions and comments:
if ' ' in etctz:
etctz, dummy = etctz.split(' ', 1)
if '#' in etctz:
etctz, dummy = etctz.split('#', 1)
return pytz.timezone(etctz.replace(' ', '_'))
# CentOS has a ZONE setting in /etc/sysconfig/clock,
# OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and
# Gentoo has a TIMEZONE setting in /etc/conf.d/clock
# We look through these files for a timezone:
zone_re = re.compile('\s*ZONE\s*=\s*\"')
timezone_re = re.compile('\s*TIMEZONE\s*=\s*\"')
end_re = re.compile('\"')
for filename in ('etc/sysconfig/clock', 'etc/conf.d/clock'):
tzpath = os.path.join(_root, filename)
if not os.path.exists(tzpath):
continue
with open(tzpath, 'rt') as tzfile:
data = tzfile.readlines()
for line in data:
# Look for the ZONE= setting.
match = zone_re.match(line)
if match is None:
# No ZONE= setting. Look for the TIMEZONE= setting.
match = timezone_re.match(line)
if match is not None:
# Some setting existed
line = line[match.end():]
etctz = line[:end_re.search(line).start()]
# We found a timezone
return pytz.timezone(etctz.replace(' ', '_'))
# No explicit setting existed. Use localtime
for filename in ('etc/localtime', 'usr/local/etc/localtime'):
tzpath = os.path.join(_root, filename)
if not os.path.exists(tzpath):
continue
with open(tzpath, 'rb') as tzfile:
return pytz.tzfile.build_tzinfo('local', tzfile)
raise pytz.UnknownTimeZoneError('Can not find any timezone configuration')
def get_localzone():
"""Get the computers configured local timezone, if any."""
global _cache_tz
if _cache_tz is None:
_cache_tz = _get_localzone()
return _cache_tz
def reload_localzone():
"""Reload the cached localzone. You need to call this if the timezone has changed."""
global _cache_tz
_cache_tz = _get_localzone()
return _cache_tz

View File

@ -0,0 +1,88 @@
try:
import winreg as winreg
except ImportError:
import winreg
from tzlocal.windows_tz import tz_names
import pytz3 as pytz
_cache_tz = None
def valuestodict(key):
"""Convert a registry key's values to a dictionary."""
dict = {}
size = winreg.QueryInfoKey(key)[1]
for i in range(size):
data = winreg.EnumValue(key, i)
dict[data[0]] = data[1]
return dict
def get_localzone_name():
# Windows is special. It has unique time zone names (in several
# meanings of the word) available, but unfortunately, they can be
# translated to the language of the operating system, so we need to
# do a backwards lookup, by going through all time zones and see which
# one matches.
handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
localtz = winreg.OpenKey(handle, TZLOCALKEYNAME)
keyvalues = valuestodict(localtz)
localtz.Close()
if 'TimeZoneKeyName' in keyvalues:
# Windows 7 (and Vista?)
# For some reason this returns a string with loads of NUL bytes at
# least on some systems. I don't know if this is a bug somewhere, I
# just work around it.
tzkeyname = keyvalues['TimeZoneKeyName'].split('\x00', 1)[0]
else:
# Windows 2000 or XP
# This is the localized name:
tzwin = keyvalues['StandardName']
# Open the list of timezones to look up the real name:
TZKEYNAME = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
tzkey = winreg.OpenKey(handle, TZKEYNAME)
# Now, match this value to Time Zone information
tzkeyname = None
for i in range(winreg.QueryInfoKey(tzkey)[0]):
subkey = winreg.EnumKey(tzkey, i)
sub = winreg.OpenKey(tzkey, subkey)
data = valuestodict(sub)
sub.Close()
if data['Std'] == tzwin:
tzkeyname = subkey
break
tzkey.Close()
handle.Close()
if tzkeyname is None:
raise LookupError('Can not find Windows timezone configuration')
timezone = tz_names.get(tzkeyname)
if timezone is None:
# Nope, that didn't work. Try adding "Standard Time",
# it seems to work a lot of times:
timezone = tz_names.get(tzkeyname + " Standard Time")
# Return what we have.
if timezone is None:
raise pytz.UnknownTimeZoneError('Can not find timezone ' + tzkeyname)
return timezone
def get_localzone():
"""Returns the zoneinfo-based tzinfo object that matches the Windows-configured timezone."""
global _cache_tz
if _cache_tz is None:
_cache_tz = pytz.timezone(get_localzone_name())
return _cache_tz
def reload_localzone():
"""Reload the cached localzone. You need to call this if the timezone has changed."""
global _cache_tz
_cache_tz = pytz.timezone(get_localzone_name())

View File

@ -0,0 +1,101 @@
# This file is autogenerated by the get_windows_info.py script
# Do not edit.
tz_names = {'AUS Central Standard Time': 'Australia/Darwin',
'AUS Eastern Standard Time': 'Australia/Sydney',
'Afghanistan Standard Time': 'Asia/Kabul',
'Alaskan Standard Time': 'America/Anchorage',
'Arab Standard Time': 'Asia/Riyadh',
'Arabian Standard Time': 'Asia/Dubai',
'Arabic Standard Time': 'Asia/Baghdad',
'Argentina Standard Time': 'America/Buenos_Aires',
'Atlantic Standard Time': 'America/Halifax',
'Azerbaijan Standard Time': 'Asia/Baku',
'Azores Standard Time': 'Atlantic/Azores',
'Bahia Standard Time': 'America/Bahia',
'Bangladesh Standard Time': 'Asia/Dhaka',
'Canada Central Standard Time': 'America/Regina',
'Cape Verde Standard Time': 'Atlantic/Cape_Verde',
'Caucasus Standard Time': 'Asia/Yerevan',
'Cen. Australia Standard Time': 'Australia/Adelaide',
'Central America Standard Time': 'America/Guatemala',
'Central Asia Standard Time': 'Asia/Almaty',
'Central Brazilian Standard Time': 'America/Cuiaba',
'Central Europe Standard Time': 'Europe/Budapest',
'Central European Standard Time': 'Europe/Warsaw',
'Central Pacific Standard Time': 'Pacific/Guadalcanal',
'Central Standard Time': 'America/Chicago',
'Central Standard Time (Mexico)': 'America/Mexico_City',
'China Standard Time': 'Asia/Shanghai',
'Dateline Standard Time': 'Etc/GMT+12',
'E. Africa Standard Time': 'Africa/Nairobi',
'E. Australia Standard Time': 'Australia/Brisbane',
'E. Europe Standard Time': 'Asia/Nicosia',
'E. South America Standard Time': 'America/Sao_Paulo',
'Eastern Standard Time': 'America/New_York',
'Egypt Standard Time': 'Africa/Cairo',
'Ekaterinburg Standard Time': 'Asia/Yekaterinburg',
'FLE Standard Time': 'Europe/Kiev',
'Fiji Standard Time': 'Pacific/Fiji',
'GMT Standard Time': 'Europe/London',
'GTB Standard Time': 'Europe/Bucharest',
'Georgian Standard Time': 'Asia/Tbilisi',
'Greenland Standard Time': 'America/Godthab',
'Greenwich Standard Time': 'Atlantic/Reykjavik',
'Hawaiian Standard Time': 'Pacific/Honolulu',
'India Standard Time': 'Asia/Calcutta',
'Iran Standard Time': 'Asia/Tehran',
'Israel Standard Time': 'Asia/Jerusalem',
'Jordan Standard Time': 'Asia/Amman',
'Kaliningrad Standard Time': 'Europe/Kaliningrad',
'Korea Standard Time': 'Asia/Seoul',
'Magadan Standard Time': 'Asia/Magadan',
'Mauritius Standard Time': 'Indian/Mauritius',
'Middle East Standard Time': 'Asia/Beirut',
'Montevideo Standard Time': 'America/Montevideo',
'Morocco Standard Time': 'Africa/Casablanca',
'Mountain Standard Time': 'America/Denver',
'Mountain Standard Time (Mexico)': 'America/Chihuahua',
'Myanmar Standard Time': 'Asia/Rangoon',
'N. Central Asia Standard Time': 'Asia/Novosibirsk',
'Namibia Standard Time': 'Africa/Windhoek',
'Nepal Standard Time': 'Asia/Katmandu',
'New Zealand Standard Time': 'Pacific/Auckland',
'Newfoundland Standard Time': 'America/St_Johns',
'North Asia East Standard Time': 'Asia/Irkutsk',
'North Asia Standard Time': 'Asia/Krasnoyarsk',
'Pacific SA Standard Time': 'America/Santiago',
'Pacific Standard Time': 'America/Los_Angeles',
'Pacific Standard Time (Mexico)': 'America/Santa_Isabel',
'Pakistan Standard Time': 'Asia/Karachi',
'Paraguay Standard Time': 'America/Asuncion',
'Romance Standard Time': 'Europe/Paris',
'Russian Standard Time': 'Europe/Moscow',
'SA Eastern Standard Time': 'America/Cayenne',
'SA Pacific Standard Time': 'America/Bogota',
'SA Western Standard Time': 'America/La_Paz',
'SE Asia Standard Time': 'Asia/Bangkok',
'Samoa Standard Time': 'Pacific/Apia',
'Singapore Standard Time': 'Asia/Singapore',
'South Africa Standard Time': 'Africa/Johannesburg',
'Sri Lanka Standard Time': 'Asia/Colombo',
'Syria Standard Time': 'Asia/Damascus',
'Taipei Standard Time': 'Asia/Taipei',
'Tasmania Standard Time': 'Australia/Hobart',
'Tokyo Standard Time': 'Asia/Tokyo',
'Tonga Standard Time': 'Pacific/Tongatapu',
'Turkey Standard Time': 'Europe/Istanbul',
'US Eastern Standard Time': 'America/Indianapolis',
'US Mountain Standard Time': 'America/Phoenix',
'UTC': 'Etc/GMT',
'UTC+12': 'Etc/GMT-12',
'UTC-02': 'Etc/GMT+2',
'UTC-11': 'Etc/GMT+11',
'Ulaanbaatar Standard Time': 'Asia/Ulaanbaatar',
'Venezuela Standard Time': 'America/Caracas',
'Vladivostok Standard Time': 'Asia/Vladivostok',
'W. Australia Standard Time': 'Australia/Perth',
'W. Central Africa Standard Time': 'Africa/Lagos',
'W. Europe Standard Time': 'Europe/Berlin',
'West Asia Standard Time': 'Asia/Tashkent',
'West Pacific Standard Time': 'Pacific/Port_Moresby',
'Yakutsk Standard Time': 'Asia/Yakutsk'}