Aestate
decoder.py
Go to the documentation of this file.
1 """Implementation of JSONDecoder
2 """
3 from __future__ import absolute_import
4 import re
5 import sys
6 import struct
7 from .compat import PY3, unichr
8 from .scanner import make_scanner, JSONDecodeError
9 
10 
12  try:
13  from ._speedups import scanstring
14  return scanstring
15  except ImportError:
16  return None
17 
18 
19 c_scanstring = _import_c_scanstring()
20 
21 # NOTE (3.1.0): JSONDecodeError may still be imported from this module for
22 # compatibility, but it was never in the __all__
23 __all__ = ['JSONDecoder']
24 
25 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
26 
27 
29  if sys.version_info < (2, 6):
30  _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
31  nan, inf = struct.unpack('>dd', _BYTES)
32  else:
33  nan = float('nan')
34  inf = float('inf')
35  return nan, inf, -inf
36 
37 
38 NaN, PosInf, NegInf = _floatconstants()
39 
40 _CONSTANTS = {
41  '-Infinity': NegInf,
42  'Infinity': PosInf,
43  'NaN': NaN,
44 }
45 
46 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
47 BACKSLASH = {
48  '"': u'"', '\\': u'\\', '/': u'/',
49  'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
50 }
51 
52 DEFAULT_ENCODING = "utf-8"
53 
54 
55 def py_scanstring(s, end, encoding=None, strict=True,
56  _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
57  _PY3=PY3, _maxunicode=sys.maxunicode):
58  """Scan the string s for a JSON string. End is the index of the
59  character in s after the quote that started the JSON string.
60  Unescapes all valid JSON string escape sequences and raises ValueError
61  on attempt to decode an invalid string. If strict is False then literal
62  control characters are allowed in the string.
63 
64  Returns a tuple of the decoded string and the index of the character in s
65  after the end quote."""
66  if encoding is None:
67  encoding = DEFAULT_ENCODING
68  chunks = []
69  _append = chunks.append
70  begin = end - 1
71  while 1:
72  chunk = _m(s, end)
73  if chunk is None:
74  raise JSONDecodeError(
75  "Unterminated string starting at", s, begin)
76  end = chunk.end()
77  content, terminator = chunk.groups()
78  # Content is contains zero or more unescaped string characters
79  if content:
80  if not _PY3 and not isinstance(content, unicode):
81  content = unicode(content, encoding)
82  _append(content)
83  # Terminator is the end of string, a literal control character,
84  # or a backslash denoting that an escape sequence follows
85  if terminator == '"':
86  break
87  elif terminator != '\\':
88  if strict:
89  msg = "Invalid control character %r at"
90  raise JSONDecodeError(msg, s, end)
91  else:
92  _append(terminator)
93  continue
94  try:
95  esc = s[end]
96  except IndexError:
97  raise JSONDecodeError(
98  "Unterminated string starting at", s, begin)
99  # If not a unicode escape sequence, must be in the lookup table
100  if esc != 'u':
101  try:
102  char = _b[esc]
103  except KeyError:
104  msg = "Invalid \\X escape sequence %r"
105  raise JSONDecodeError(msg, s, end)
106  end += 1
107  else:
108  # Unicode escape sequence
109  msg = "Invalid \\uXXXX escape sequence"
110  esc = s[end + 1:end + 5]
111  escX = esc[1:2]
112  if len(esc) != 4 or escX == 'x' or escX == 'X':
113  raise JSONDecodeError(msg, s, end - 1)
114  try:
115  uni = int(esc, 16)
116  except ValueError:
117  raise JSONDecodeError(msg, s, end - 1)
118  end += 5
119  # Check for surrogate pair on UCS-4 systems
120  # Note that this will join high/low surrogate pairs
121  # but will also pass unpaired surrogates through
122  if (_maxunicode > 65535 and
123  uni & 0xfc00 == 0xd800 and
124  s[end:end + 2] == '\\u'):
125  esc2 = s[end + 2:end + 6]
126  escX = esc2[1:2]
127  if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
128  try:
129  uni2 = int(esc2, 16)
130  except ValueError:
131  raise JSONDecodeError(msg, s, end)
132  if uni2 & 0xfc00 == 0xdc00:
133  uni = 0x10000 + (((uni - 0xd800) << 10) |
134  (uni2 - 0xdc00))
135  end += 6
136  char = unichr(uni)
137  # Append the unescaped character
138  _append(char)
139  return _join(chunks), end
140 
141 
142 # Use speedup if available
143 scanstring = c_scanstring or py_scanstring
144 
145 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
146 WHITESPACE_STR = ' \t\n\r'
147 
148 
149 def JSONObject(state, encoding, strict, scan_once, object_hook,
150  object_pairs_hook, memo=None,
151  _w=WHITESPACE.match, _ws=WHITESPACE_STR):
152  (s, end) = state
153  # Backwards compatibility
154  if memo is None:
155  memo = {}
156  memo_get = memo.setdefault
157  pairs = []
158  # Use a slice to prevent IndexError from being raised, the following
159  # check will raise a more specific ValueError if the string is empty
160  nextchar = s[end:end + 1]
161  # Normally we expect nextchar == '"'
162  if nextchar != '"':
163  if nextchar in _ws:
164  end = _w(s, end).end()
165  nextchar = s[end:end + 1]
166  # Trivial empty object
167  if nextchar == '}':
168  if object_pairs_hook is not None:
169  result = object_pairs_hook(pairs)
170  return result, end + 1
171  pairs = {}
172  if object_hook is not None:
173  pairs = object_hook(pairs)
174  return pairs, end + 1
175  elif nextchar != '"':
176  raise JSONDecodeError(
177  "Expecting property name enclosed in double quotes",
178  s, end)
179  end += 1
180  while True:
181  key, end = scanstring(s, end, encoding, strict)
182  key = memo_get(key, key)
183 
184  # To skip some function call overhead we optimize the fast paths where
185  # the JSON key separator is ": " or just ":".
186  if s[end:end + 1] != ':':
187  end = _w(s, end).end()
188  if s[end:end + 1] != ':':
189  raise JSONDecodeError("Expecting ':' delimiter", s, end)
190 
191  end += 1
192 
193  try:
194  if s[end] in _ws:
195  end += 1
196  if s[end] in _ws:
197  end = _w(s, end + 1).end()
198  except IndexError:
199  pass
200 
201  value, end = scan_once(s, end)
202  pairs.append((key, value))
203 
204  try:
205  nextchar = s[end]
206  if nextchar in _ws:
207  end = _w(s, end + 1).end()
208  nextchar = s[end]
209  except IndexError:
210  nextchar = ''
211  end += 1
212 
213  if nextchar == '}':
214  break
215  elif nextchar != ',':
216  raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
217 
218  try:
219  nextchar = s[end]
220  if nextchar in _ws:
221  end += 1
222  nextchar = s[end]
223  if nextchar in _ws:
224  end = _w(s, end + 1).end()
225  nextchar = s[end]
226  except IndexError:
227  nextchar = ''
228 
229  end += 1
230  if nextchar != '"':
231  raise JSONDecodeError(
232  "Expecting property name enclosed in double quotes",
233  s, end - 1)
234 
235  if object_pairs_hook is not None:
236  result = object_pairs_hook(pairs)
237  return result, end
238  pairs = dict(pairs)
239  if object_hook is not None:
240  pairs = object_hook(pairs)
241  return pairs, end
242 
243 
244 def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
245  (s, end) = state
246  values = []
247  nextchar = s[end:end + 1]
248  if nextchar in _ws:
249  end = _w(s, end + 1).end()
250  nextchar = s[end:end + 1]
251  # Look-ahead for trivial empty array
252  if nextchar == ']':
253  return values, end + 1
254  elif nextchar == '':
255  raise JSONDecodeError("Expecting value or ']'", s, end)
256  _append = values.append
257  while True:
258  value, end = scan_once(s, end)
259  _append(value)
260  nextchar = s[end:end + 1]
261  if nextchar in _ws:
262  end = _w(s, end + 1).end()
263  nextchar = s[end:end + 1]
264  end += 1
265  if nextchar == ']':
266  break
267  elif nextchar != ',':
268  raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
269 
270  try:
271  if s[end] in _ws:
272  end += 1
273  if s[end] in _ws:
274  end = _w(s, end + 1).end()
275  except IndexError:
276  pass
277 
278  return values, end
279 
280 
281 class JSONDecoder(object):
282  """Simple JSON <http://json.org> decoder
283 
284  Performs the following translations in decoding by default:
285 
286  +---------------+-------------------+
287  | JSON | Python |
288  +===============+===================+
289  | object | dict |
290  +---------------+-------------------+
291  | array | list |
292  +---------------+-------------------+
293  | string | str, unicode |
294  +---------------+-------------------+
295  | number (int) | int, long |
296  +---------------+-------------------+
297  | number (real) | float |
298  +---------------+-------------------+
299  | true | True |
300  +---------------+-------------------+
301  | false | False |
302  +---------------+-------------------+
303  | null | None |
304  +---------------+-------------------+
305 
306  It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
307  their corresponding ``float`` values, which is outside the JSON spec.
308 
309  """
310 
311  def __init__(self, encoding=None, object_hook=None, parse_float=None,
312  parse_int=None, parse_constant=None, strict=True,
313  object_pairs_hook=None):
314  """
315  *encoding* 确定用于解释任何
316  :class:`str` 对象由此实例解码 (``'utf-8'`` by
317  默认)。它在解码 unicode 对象时不起作用。
318 
319  请注意,目前只有作为 ASCII 超集的编码才有效,
320  其他编码的字符串应该作为 unicode 传入。
321 
322  *object_hook*,如果指定,将使用每个的结果调用
323  解码后的 JSON 对象及其返回值将用于代替
324  给定:class:`dict`。这可用于提供自定义
325  反序列化(例如支持 JSON-RPC 类提示)。
326 
327  *object_pairs_hook* 是一个可选函数,将被调用
328  使用有序的对列表解码任何对象文字的结果。
329  将使用 *object_pairs_hook* 的返回值代替
330  :class:`字典`。此功能可用于实现自定义解码器
331  依赖于键和值对解码的顺序(对于
332  例如,:func:`collections.OrderedDict` 会记住
333  插入)。如果还定义了 *object_hook*,则 *object_pairs_hook*
334  优先。
335 
336  *parse_float*,如果指定,将使用每个的字符串调用
337  要解码的 JSON 浮点数。默认情况下,这相当于
338  ``浮动(num_str)``。这可用于使用其他数据类型或解析器
339  对于 JSON 浮点数(例如 :class:`decimal.Decimal`)。
340 
341  * parse_int *(如果指定)将使用每个字符串
342  要解码的 JSON int。默认情况下,这相当于
343  ``int(num_str)``。这可用于使用其他数据类型或解析器
344  对于 JSON 整数(例如 :class:`float`)。
345 
346  *parse_constant*,如果指定,将使用以下之一调用
347  以下字符串:“-Infinity”、“Infinity”、“NaN”。这
348  如果无效的JSON数字为,则可用于引发异常
349  遭遇。
350 
351  *strict* 控制解析器在遇到
352  字符串中的无效控制字符。默认设置为
353  ``True`` 表示未转义的控制字符是解析错误,如果
354  ``False`` 那么字符串中将允许使用控制字符
355 
356  """
357  if encoding is None:
358  encoding = DEFAULT_ENCODING
359  self.encoding = encoding
360  self.object_hook = object_hook
361  self.object_pairs_hook = object_pairs_hook
362  self.parse_float = parse_float or float
363  self.parse_int = parse_int or int
364  self.parse_constant = parse_constant or _CONSTANTS.__getitem__
365  self.strict = strict
366  self.parse_object = JSONObject
367  self.parse_array = JSONArray
368  self.parse_string = scanstring
369  self.memo = {}
370  self.scan_once = make_scanner(self)
371 
372  def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
373  """返回 ``s`` 的 Python 表示(一个 ``str`` 或 ``unicode``
374 
375  包含 JSON 文档的实例)
376 
377  """
378  if _PY3 and isinstance(s, bytes):
379  s = str(s, self.encoding)
380  obj, end = self.raw_decode(s)
381  end = _w(s, end).end()
382  if end != len(s):
383  raise JSONDecodeError("Extra data", s, end, len(s))
384  return obj
385 
386  def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
387  """
388  从一个str或unicode解码一个 JSON 文档
389 
390  以 JSON 文档开头)并返回 Python 的 2 元组
391 
392  表示和 s 中文档结束的索引。
393 
394  可选地,``idx`` 可用于指定 ``s`` 中的偏移量
395 
396  这可用于从字符串解码 JSON 文档,该字符串可能
397  最后有多余的数据。
398  """
399  if idx < 0:
400  raise JSONDecodeError('Expecting value', s, idx)
401  if _PY3 and not isinstance(s, str):
402  raise TypeError("Input string must be text, not bytes")
403  # strip UTF-8 bom
404  if len(s) > idx:
405  ord0 = ord(s[idx])
406  if ord0 == 0xfeff:
407  idx += 1
408  elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
409  idx += 3
410  return self.scan_once(s, idx=_w(s, idx).end())
aestate.ajson.sim.decoder.JSONDecoder.object_hook
object_hook
Definition: decoder.py:358
aestate.ajson.sim.decoder.scanstring
def scanstring
Definition: decoder.py:143
aestate.ajson.sim.decoder.JSONDecoder.parse_string
parse_string
Definition: decoder.py:366
aestate.ajson.sim.decoder.JSONDecoder.parse_int
parse_int
Definition: decoder.py:361
aestate.ajson.sim.decoder.JSONDecoder.scan_once
scan_once
Definition: decoder.py:368
aestate.ajson.sim.decoder.JSONDecoder.decode
def decode(self, s, _w=WHITESPACE.match, _PY3=PY3)
Definition: decoder.py:372
aestate.ajson.sim.decoder.JSONDecoder.parse_array
parse_array
Definition: decoder.py:365
aestate.ajson.sim.decoder.JSONObject
def JSONObject(state, encoding, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR)
Definition: decoder.py:149
aestate.ajson.sim.decoder.JSONDecoder.encoding
encoding
Definition: decoder.py:357
aestate.ajson.sim.decoder.JSONArray
def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR)
Definition: decoder.py:244
aestate.ajson.sim.decoder.JSONDecoder.object_pairs_hook
object_pairs_hook
Definition: decoder.py:359
aestate.ajson.sim.decoder.JSONDecoder.strict
strict
Definition: decoder.py:363
aestate.ajson.sim.scanner.make_scanner
def make_scanner
Definition: scanner.py:85
JSONDecodeError
static PyObject * JSONDecodeError
Definition: _speedups.c:764
aestate.ajson.sim.decoder.JSONDecoder
Definition: decoder.py:281
aestate.ajson.sim.decoder._floatconstants
def _floatconstants()
Definition: decoder.py:28
aestate.ajson.sim.compat.unichr
unichr
Definition: compat.py:22
aestate.ajson.sim.decoder.JSONDecoder.parse_object
parse_object
Definition: decoder.py:364
aestate.ajson.sim.decoder.JSONDecoder.memo
memo
Definition: decoder.py:367
aestate.ajson.sim.decoder.JSONDecoder.parse_float
parse_float
Definition: decoder.py:360
aestate.ajson.sim.decoder._import_c_scanstring
def _import_c_scanstring()
Definition: decoder.py:11
aestate.ajson.sim.decoder.JSONDecoder.raw_decode
def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3)
Definition: decoder.py:386
aestate.ajson.sim.decoder.JSONDecoder.__init__
def __init__(self, encoding=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None)
Definition: decoder.py:311
aestate.ajson.sim.decoder.JSONDecoder.parse_constant
parse_constant
Definition: decoder.py:362
aestate.ajson.sim.decoder.py_scanstring
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, _PY3=PY3, _maxunicode=sys.maxunicode)
Definition: decoder.py:55