Aestate
_speedups.c
Go to the documentation of this file.
1 /* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */
2 #include "Python.h"
3 #include "structmember.h"
4 
5 #if PY_MAJOR_VERSION >= 3
6 #define PyInt_FromSsize_t PyLong_FromSsize_t
7 #define PyInt_AsSsize_t PyLong_AsSsize_t
8 #define PyInt_Check(obj) 0
9 #define PyInt_CheckExact(obj) 0
10 #define JSON_UNICHR Py_UCS4
11 #define JSON_InternFromString PyUnicode_InternFromString
12 #define PyString_GET_SIZE PyUnicode_GET_LENGTH
13 #define PY2_UNUSED
14 #define PY3_UNUSED UNUSED
15 #else /* PY_MAJOR_VERSION >= 3 */
16 #define PY2_UNUSED UNUSED
17 #define PY3_UNUSED
18 #define PyBytes_Check PyString_Check
19 #define PyUnicode_READY(obj) 0
20 #define PyUnicode_KIND(obj) (sizeof(Py_UNICODE))
21 #define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj)))
22 #define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)])
23 #define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE
24 #define JSON_UNICHR Py_UNICODE
25 #define JSON_InternFromString PyString_InternFromString
26 #endif /* PY_MAJOR_VERSION < 3 */
27 
28 #if PY_VERSION_HEX < 0x02070000
29 #if !defined(PyOS_string_to_double)
30 #define PyOS_string_to_double json_PyOS_string_to_double
31 static double
32 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
33 static double
34 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
35 {
36  double x;
37  assert(endptr == NULL);
38  assert(overflow_exception == NULL);
39  PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
40  x = PyOS_ascii_atof(s);
41  PyFPE_END_PROTECT(x)
42  return x;
43 }
44 #endif
45 #endif /* PY_VERSION_HEX < 0x02070000 */
46 
47 #if PY_VERSION_HEX < 0x02060000
48 #if !defined(Py_TYPE)
49 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
50 #endif
51 #if !defined(Py_SIZE)
52 #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
53 #endif
54 #if !defined(PyVarObject_HEAD_INIT)
55 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
56 #endif
57 #endif /* PY_VERSION_HEX < 0x02060000 */
58 
59 #ifdef __GNUC__
60 #define UNUSED __attribute__((__unused__))
61 #else
62 #define UNUSED
63 #endif
64 
65 #define DEFAULT_ENCODING "utf-8"
66 
67 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
68 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
69 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
70 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
71 
72 #define JSON_ALLOW_NAN 1
73 #define JSON_IGNORE_NAN 2
74 
75 static PyObject *JSON_Infinity = NULL;
76 static PyObject *JSON_NegInfinity = NULL;
77 static PyObject *JSON_NaN = NULL;
78 static PyObject *JSON_EmptyUnicode = NULL;
79 #if PY_MAJOR_VERSION < 3
80 static PyObject *JSON_EmptyStr = NULL;
81 #endif
82 
83 static PyTypeObject PyScannerType;
84 static PyTypeObject PyEncoderType;
85 
86 typedef struct {
87  PyObject *large_strings; /* A list of previously accumulated large strings */
88  PyObject *small_strings; /* Pending small strings */
89 } JSON_Accu;
90 
91 static int
93 static int
94 JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode);
95 static PyObject *
97 static void
99 
100 #define ERR_EXPECTING_VALUE "Expecting value"
101 #define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'"
102 #define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'"
103 #define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'"
104 #define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes"
105 #define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'"
106 #define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter"
107 #define ERR_STRING_UNTERMINATED "Unterminated string starting at"
108 #define ERR_STRING_CONTROL "Invalid control character %r at"
109 #define ERR_STRING_ESC1 "Invalid \\X escape sequence %r"
110 #define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence"
111 
112 typedef struct _PyScannerObject {
113  PyObject_HEAD
114  PyObject *encoding;
115  PyObject *strict_bool;
116  int strict;
117  PyObject *object_hook;
118  PyObject *pairs_hook;
119  PyObject *parse_float;
120  PyObject *parse_int;
121  PyObject *parse_constant;
122  PyObject *memo;
124 
125 static PyMemberDef scanner_members[] = {
126  {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
127  {"strict", T_OBJECT, offsetof(PyScannerObject, strict_bool), READONLY, "strict"},
128  {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
129  {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
130  {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
131  {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
132  {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
133  {NULL}
134 };
135 
136 typedef struct _PyEncoderObject {
137  PyObject_HEAD
138  PyObject *markers;
139  PyObject *defaultfn;
140  PyObject *encoder;
141  PyObject *indent;
142  PyObject *key_separator;
143  PyObject *item_separator;
144  PyObject *sort_keys;
145  PyObject *key_memo;
146  PyObject *encoding;
147  PyObject *Decimal;
148  PyObject *skipkeys_bool;
149  int skipkeys;
151  /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */
157  PyObject *max_long_size;
158  PyObject *min_long_size;
159  PyObject *item_sort_key;
160  PyObject *item_sort_kw;
161  int for_json;
163 
164 static PyMemberDef encoder_members[] = {
165  {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
166  {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
167  {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
168  {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"},
169  {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
170  {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
171  {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
172  {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
173  /* Python 2.5 does not support T_BOOl */
174  {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"},
175  {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
176  {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
177  {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"},
178  {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"},
179  {NULL}
180 };
181 
182 static PyObject *
183 join_list_unicode(PyObject *lst);
184 static PyObject *
185 JSON_ParseEncoding(PyObject *encoding);
186 static PyObject *
187 maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj);
188 static Py_ssize_t
190 static Py_ssize_t
191 ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars);
192 static PyObject *
193 ascii_escape_unicode(PyObject *pystr);
194 static PyObject *
195 ascii_escape_str(PyObject *pystr);
196 static PyObject *
197 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
198 #if PY_MAJOR_VERSION < 3
199 static PyObject *
200 join_list_string(PyObject *lst);
201 static PyObject *
202 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
203 static PyObject *
204 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
205 static PyObject *
206 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
207 #endif
208 static PyObject *
209 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
210 static PyObject *
211 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
212 static PyObject *
213 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
214 static PyObject *
215 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
216 static void
217 scanner_dealloc(PyObject *self);
218 static int
219 scanner_clear(PyObject *self);
220 static PyObject *
221 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
222 static void
223 encoder_dealloc(PyObject *self);
224 static int
225 encoder_clear(PyObject *self);
226 static int
227 is_raw_json(PyObject *obj);
228 static PyObject *
229 encoder_stringify_key(PyEncoderObject *s, PyObject *key);
230 static int
231 encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level);
232 static int
233 encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level);
234 static int
235 encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level);
236 static PyObject *
237 _encoded_const(PyObject *obj);
238 static void
239 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
240 static PyObject *
241 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
242 static int
243 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
244 static PyObject *
245 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
246 static PyObject *
247 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
248 static int
249 _is_namedtuple(PyObject *obj);
250 static int
251 _has_for_json_hook(PyObject *obj);
252 static PyObject *
253 moduleinit(void);
254 
255 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
256 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
257 
258 #define MIN_EXPANSION 6
259 
260 static PyObject* RawJSONType = NULL;
261 static int
262 is_raw_json(PyObject *obj)
263 {
264  return PyObject_IsInstance(obj, RawJSONType) ? 1 : 0;
265 }
266 
267 static int
269 {
270  /* Lazily allocated */
271  acc->large_strings = NULL;
272  acc->small_strings = PyList_New(0);
273  if (acc->small_strings == NULL)
274  return -1;
275  return 0;
276 }
277 
278 static int
280 {
281  Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings);
282  if (nsmall) {
283  int ret;
284  PyObject *joined;
285  if (acc->large_strings == NULL) {
286  acc->large_strings = PyList_New(0);
287  if (acc->large_strings == NULL)
288  return -1;
289  }
290 #if PY_MAJOR_VERSION >= 3
291  joined = join_list_unicode(acc->small_strings);
292 #else /* PY_MAJOR_VERSION >= 3 */
293  joined = join_list_string(acc->small_strings);
294 #endif /* PY_MAJOR_VERSION < 3 */
295  if (joined == NULL)
296  return -1;
297  if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) {
298  Py_DECREF(joined);
299  return -1;
300  }
301  ret = PyList_Append(acc->large_strings, joined);
302  Py_DECREF(joined);
303  return ret;
304  }
305  return 0;
306 }
307 
308 static int
309 JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode)
310 {
311  Py_ssize_t nsmall;
312 #if PY_MAJOR_VERSION >= 3
313  assert(PyUnicode_Check(unicode));
314 #else /* PY_MAJOR_VERSION >= 3 */
315  assert(PyString_Check(unicode) || PyUnicode_Check(unicode));
316 #endif /* PY_MAJOR_VERSION < 3 */
317 
318  if (PyList_Append(acc->small_strings, unicode))
319  return -1;
320  nsmall = PyList_GET_SIZE(acc->small_strings);
321  /* Each item in a list of unicode objects has an overhead (in 64-bit
322  * builds) of:
323  * - 8 bytes for the list slot
324  * - 56 bytes for the header of the unicode object
325  * that is, 64 bytes. 100000 such objects waste more than 6MB
326  * compared to a single concatenated string.
327  */
328  if (nsmall < 100000)
329  return 0;
330  return flush_accumulator(acc);
331 }
332 
333 static PyObject *
335 {
336  int ret;
337  PyObject *res;
338 
339  ret = flush_accumulator(acc);
340  Py_CLEAR(acc->small_strings);
341  if (ret) {
342  Py_CLEAR(acc->large_strings);
343  return NULL;
344  }
345  res = acc->large_strings;
346  acc->large_strings = NULL;
347  if (res == NULL)
348  return PyList_New(0);
349  return res;
350 }
351 
352 static void
354 {
355  Py_CLEAR(acc->small_strings);
356  Py_CLEAR(acc->large_strings);
357 }
358 
359 static int
361 {
362  return c >= '0' && c <= '9';
363 }
364 
365 static PyObject *
366 maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj)
367 {
368  if (s->max_long_size != Py_None && s->min_long_size != Py_None) {
369  if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) ||
370  PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) {
371 #if PY_MAJOR_VERSION >= 3
372  PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded);
373 #else
374  PyObject* quoted = PyString_FromFormat("\"%s\"",
375  PyString_AsString(encoded));
376 #endif
377  Py_DECREF(encoded);
378  encoded = quoted;
379  }
380  }
381 
382  return encoded;
383 }
384 
385 static int
386 _is_namedtuple(PyObject *obj)
387 {
388  int rval = 0;
389  PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
390  if (_asdict == NULL) {
391  PyErr_Clear();
392  return 0;
393  }
394  rval = PyCallable_Check(_asdict);
395  Py_DECREF(_asdict);
396  return rval;
397 }
398 
399 static int
400 _has_for_json_hook(PyObject *obj)
401 {
402  int rval = 0;
403  PyObject *for_json = PyObject_GetAttrString(obj, "for_json");
404  if (for_json == NULL) {
405  PyErr_Clear();
406  return 0;
407  }
408  rval = PyCallable_Check(for_json);
409  Py_DECREF(for_json);
410  return rval;
411 }
412 
413 static int
414 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
415 {
416  /* PyObject to Py_ssize_t converter */
417  *size_ptr = PyInt_AsSsize_t(o);
418  if (*size_ptr == -1 && PyErr_Occurred())
419  return 0;
420  return 1;
421 }
422 
423 static PyObject *
424 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
425 {
426  /* Py_ssize_t to PyObject converter */
427  return PyInt_FromSsize_t(*size_ptr);
428 }
429 
430 static Py_ssize_t
431 ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars)
432 {
433  /* Escape unicode code point c to ASCII escape sequences
434  in char *output. output must have at least 12 bytes unused to
435  accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
436  if (S_CHAR(c)) {
437  output[chars++] = (char)c;
438  }
439  else {
440  output[chars++] = '\\';
441  switch (c) {
442  case '\\': output[chars++] = (char)c; break;
443  case '"': output[chars++] = (char)c; break;
444  case '\b': output[chars++] = 'b'; break;
445  case '\f': output[chars++] = 'f'; break;
446  case '\n': output[chars++] = 'n'; break;
447  case '\r': output[chars++] = 'r'; break;
448  case '\t': output[chars++] = 't'; break;
449  default:
450 #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
451  if (c >= 0x10000) {
452  /* UTF-16 surrogate pair */
453  JSON_UNICHR v = c - 0x10000;
454  c = 0xd800 | ((v >> 10) & 0x3ff);
455  output[chars++] = 'u';
456  output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
457  output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
458  output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
459  output[chars++] = "0123456789abcdef"[(c ) & 0xf];
460  c = 0xdc00 | (v & 0x3ff);
461  output[chars++] = '\\';
462  }
463 #endif
464  output[chars++] = 'u';
465  output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
466  output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
467  output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
468  output[chars++] = "0123456789abcdef"[(c ) & 0xf];
469  }
470  }
471  return chars;
472 }
473 
474 static Py_ssize_t
476 {
477  if (S_CHAR(c)) {
478  return 1;
479  }
480  else if (c == '\\' ||
481  c == '"' ||
482  c == '\b' ||
483  c == '\f' ||
484  c == '\n' ||
485  c == '\r' ||
486  c == '\t') {
487  return 2;
488  }
489 #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
490  else if (c >= 0x10000U) {
491  return 2 * MIN_EXPANSION;
492  }
493 #endif
494  else {
495  return MIN_EXPANSION;
496  }
497 }
498 
499 static PyObject *
500 ascii_escape_unicode(PyObject *pystr)
501 {
502  /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
503  Py_ssize_t i;
504  Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr);
505  Py_ssize_t output_size = 2;
506  Py_ssize_t chars;
507  PY2_UNUSED int kind = PyUnicode_KIND(pystr);
508  void *data = PyUnicode_DATA(pystr);
509  PyObject *rval;
510  char *output;
511 
512  output_size = 2;
513  for (i = 0; i < input_chars; i++) {
514  output_size += ascii_char_size(PyUnicode_READ(kind, data, i));
515  }
516 #if PY_MAJOR_VERSION >= 3
517  rval = PyUnicode_New(output_size, 127);
518  if (rval == NULL) {
519  return NULL;
520  }
521  assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND);
522  output = (char *)PyUnicode_DATA(rval);
523 #else
524  rval = PyString_FromStringAndSize(NULL, output_size);
525  if (rval == NULL) {
526  return NULL;
527  }
528  output = PyString_AS_STRING(rval);
529 #endif
530  chars = 0;
531  output[chars++] = '"';
532  for (i = 0; i < input_chars; i++) {
533  chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars);
534  }
535  output[chars++] = '"';
536  assert(chars == output_size);
537  return rval;
538 }
539 
540 #if PY_MAJOR_VERSION >= 3
541 
542 static PyObject *
543 ascii_escape_str(PyObject *pystr)
544 {
545  PyObject *rval;
546  PyObject *input = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(pystr), PyBytes_GET_SIZE(pystr), NULL);
547  if (input == NULL)
548  return NULL;
549  rval = ascii_escape_unicode(input);
550  Py_DECREF(input);
551  return rval;
552 }
553 
554 #else /* PY_MAJOR_VERSION >= 3 */
555 
556 static PyObject *
557 ascii_escape_str(PyObject *pystr)
558 {
559  /* Take a PyString pystr and return a new ASCII-only escaped PyString */
560  Py_ssize_t i;
561  Py_ssize_t input_chars;
562  Py_ssize_t output_size;
563  Py_ssize_t chars;
564  PyObject *rval;
565  char *output;
566  char *input_str;
567 
568  input_chars = PyString_GET_SIZE(pystr);
569  input_str = PyString_AS_STRING(pystr);
570  output_size = 2;
571 
572  /* Fast path for a string that's already ASCII */
573  for (i = 0; i < input_chars; i++) {
574  JSON_UNICHR c = (JSON_UNICHR)input_str[i];
575  if (c > 0x7f) {
576  /* We hit a non-ASCII character, bail to unicode mode */
577  PyObject *uni;
578  uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
579  if (uni == NULL) {
580  return NULL;
581  }
582  rval = ascii_escape_unicode(uni);
583  Py_DECREF(uni);
584  return rval;
585  }
586  output_size += ascii_char_size(c);
587  }
588 
589  rval = PyString_FromStringAndSize(NULL, output_size);
590  if (rval == NULL) {
591  return NULL;
592  }
593  chars = 0;
594  output = PyString_AS_STRING(rval);
595  output[chars++] = '"';
596  for (i = 0; i < input_chars; i++) {
597  chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars);
598  }
599  output[chars++] = '"';
600  assert(chars == output_size);
601  return rval;
602 }
603 #endif /* PY_MAJOR_VERSION < 3 */
604 
605 static PyObject *
607 {
608  if (PyUnicode_Check(key)) {
609  Py_INCREF(key);
610  return key;
611  }
612 #if PY_MAJOR_VERSION >= 3
613  else if (PyBytes_Check(key) && s->encoding != NULL) {
614  const char *encoding = PyUnicode_AsUTF8(s->encoding);
615  if (encoding == NULL)
616  return NULL;
617  return PyUnicode_Decode(
618  PyBytes_AS_STRING(key),
619  PyBytes_GET_SIZE(key),
620  encoding,
621  NULL);
622  }
623 #else /* PY_MAJOR_VERSION >= 3 */
624  else if (PyString_Check(key)) {
625  Py_INCREF(key);
626  return key;
627  }
628 #endif /* PY_MAJOR_VERSION < 3 */
629  else if (PyFloat_Check(key)) {
630  return encoder_encode_float(s, key);
631  }
632  else if (key == Py_True || key == Py_False || key == Py_None) {
633  /* This must come before the PyInt_Check because
634  True and False are also 1 and 0.*/
635  return _encoded_const(key);
636  }
637  else if (PyInt_Check(key) || PyLong_Check(key)) {
638  if (!(PyInt_CheckExact(key) || PyLong_CheckExact(key))) {
639  /* See #118, do not trust custom str/repr */
640  PyObject *res;
641  PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, key, NULL);
642  if (tmp == NULL) {
643  return NULL;
644  }
645  res = PyObject_Str(tmp);
646  Py_DECREF(tmp);
647  return res;
648  }
649  else {
650  return PyObject_Str(key);
651  }
652  }
653  else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) {
654  return PyObject_Str(key);
655  }
656  if (s->skipkeys) {
657  Py_INCREF(Py_None);
658  return Py_None;
659  }
660  PyErr_Format(PyExc_TypeError,
661  "keys must be str, int, float, bool or None, "
662  "not %.100s", key->ob_type->tp_name);
663  return NULL;
664 }
665 
666 static PyObject *
668 {
669  PyObject *items;
670  PyObject *iter = NULL;
671  PyObject *lst = NULL;
672  PyObject *item = NULL;
673  PyObject *kstr = NULL;
674  PyObject *sortfun = NULL;
675  PyObject *sortres;
676  static PyObject *sortargs = NULL;
677 
678  if (sortargs == NULL) {
679  sortargs = PyTuple_New(0);
680  if (sortargs == NULL)
681  return NULL;
682  }
683 
684  if (PyDict_CheckExact(dct))
685  items = PyDict_Items(dct);
686  else
687  items = PyMapping_Items(dct);
688  if (items == NULL)
689  return NULL;
690  iter = PyObject_GetIter(items);
691  Py_DECREF(items);
692  if (iter == NULL)
693  return NULL;
694  if (s->item_sort_kw == Py_None)
695  return iter;
696  lst = PyList_New(0);
697  if (lst == NULL)
698  goto bail;
699  while ((item = PyIter_Next(iter))) {
700  PyObject *key, *value;
701  if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
702  PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
703  goto bail;
704  }
705  key = PyTuple_GET_ITEM(item, 0);
706  if (key == NULL)
707  goto bail;
708 #if PY_MAJOR_VERSION < 3
709  else if (PyString_Check(key)) {
710  /* item can be added as-is */
711  }
712 #endif /* PY_MAJOR_VERSION < 3 */
713  else if (PyUnicode_Check(key)) {
714  /* item can be added as-is */
715  }
716  else {
717  PyObject *tpl;
718  kstr = encoder_stringify_key(s, key);
719  if (kstr == NULL)
720  goto bail;
721  else if (kstr == Py_None) {
722  /* skipkeys */
723  Py_DECREF(kstr);
724  continue;
725  }
726  value = PyTuple_GET_ITEM(item, 1);
727  if (value == NULL)
728  goto bail;
729  tpl = PyTuple_Pack(2, kstr, value);
730  if (tpl == NULL)
731  goto bail;
732  Py_CLEAR(kstr);
733  Py_DECREF(item);
734  item = tpl;
735  }
736  if (PyList_Append(lst, item))
737  goto bail;
738  Py_DECREF(item);
739  }
740  Py_CLEAR(iter);
741  if (PyErr_Occurred())
742  goto bail;
743  sortfun = PyObject_GetAttrString(lst, "sort");
744  if (sortfun == NULL)
745  goto bail;
746  sortres = PyObject_Call(sortfun, sortargs, s->item_sort_kw);
747  if (!sortres)
748  goto bail;
749  Py_DECREF(sortres);
750  Py_CLEAR(sortfun);
751  iter = PyObject_GetIter(lst);
752  Py_CLEAR(lst);
753  return iter;
754 bail:
755  Py_XDECREF(sortfun);
756  Py_XDECREF(kstr);
757  Py_XDECREF(item);
758  Py_XDECREF(lst);
759  Py_XDECREF(iter);
760  return NULL;
761 }
762 
763 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
764 static PyObject *JSONDecodeError = NULL;
765 static void
766 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
767 {
768  PyObject *exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
769  if (exc) {
770  PyErr_SetObject(JSONDecodeError, exc);
771  Py_DECREF(exc);
772  }
773 }
774 
775 static PyObject *
776 join_list_unicode(PyObject *lst)
777 {
778  /* return u''.join(lst) */
779  return PyUnicode_Join(JSON_EmptyUnicode, lst);
780 }
781 
782 #if PY_MAJOR_VERSION >= 3
783 #define join_list_string join_list_unicode
784 #else /* PY_MAJOR_VERSION >= 3 */
785 static PyObject *
786 join_list_string(PyObject *lst)
787 {
788  /* return ''.join(lst) */
789  static PyObject *joinfn = NULL;
790  if (joinfn == NULL) {
791  joinfn = PyObject_GetAttrString(JSON_EmptyStr, "join");
792  if (joinfn == NULL)
793  return NULL;
794  }
795  return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
796 }
797 #endif /* PY_MAJOR_VERSION < 3 */
798 
799 static PyObject *
800 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx)
801 {
802  /* return (rval, idx) tuple, stealing reference to rval */
803  PyObject *tpl;
804  PyObject *pyidx;
805  /*
806  steal a reference to rval, returns (rval, idx)
807  */
808  if (rval == NULL) {
809  assert(PyErr_Occurred());
810  return NULL;
811  }
812  pyidx = PyInt_FromSsize_t(idx);
813  if (pyidx == NULL) {
814  Py_DECREF(rval);
815  return NULL;
816  }
817  tpl = PyTuple_New(2);
818  if (tpl == NULL) {
819  Py_DECREF(pyidx);
820  Py_DECREF(rval);
821  return NULL;
822  }
823  PyTuple_SET_ITEM(tpl, 0, rval);
824  PyTuple_SET_ITEM(tpl, 1, pyidx);
825  return tpl;
826 }
827 
828 #define APPEND_OLD_CHUNK \
829  if (chunk != NULL) { \
830  if (chunks == NULL) { \
831  chunks = PyList_New(0); \
832  if (chunks == NULL) { \
833  goto bail; \
834  } \
835  } \
836  if (PyList_Append(chunks, chunk)) { \
837  goto bail; \
838  } \
839  Py_CLEAR(chunk); \
840  }
841 
842 #if PY_MAJOR_VERSION < 3
843 static PyObject *
844 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
845 {
846  /* Read the JSON string from PyString pystr.
847  end is the index of the first character after the quote.
848  encoding is the encoding of pystr (must be an ASCII superset)
849  if strict is zero then literal control characters are allowed
850  *next_end_ptr is a return-by-reference index of the character
851  after the end quote
852 
853  Return value is a new PyString (if ASCII-only) or PyUnicode
854  */
855  PyObject *rval;
856  Py_ssize_t len = PyString_GET_SIZE(pystr);
857  Py_ssize_t begin = end - 1;
858  Py_ssize_t next = begin;
859  int has_unicode = 0;
860  char *buf = PyString_AS_STRING(pystr);
861  PyObject *chunks = NULL;
862  PyObject *chunk = NULL;
863  PyObject *strchunk = NULL;
864 
865  if (len == end) {
866  raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
867  goto bail;
868  }
869  else if (end < 0 || len < end) {
870  PyErr_SetString(PyExc_ValueError, "end is out of bounds");
871  goto bail;
872  }
873  while (1) {
874  /* Find the end of the string or the next escape */
875  Py_UNICODE c = 0;
876  for (next = end; next < len; next++) {
877  c = (unsigned char)buf[next];
878  if (c == '"' || c == '\\') {
879  break;
880  }
881  else if (strict && c <= 0x1f) {
882  raise_errmsg(ERR_STRING_CONTROL, pystr, next);
883  goto bail;
884  }
885  else if (c > 0x7f) {
886  has_unicode = 1;
887  }
888  }
889  if (!(c == '"' || c == '\\')) {
890  raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
891  goto bail;
892  }
893  /* Pick up this chunk if it's not zero length */
894  if (next != end) {
896  strchunk = PyString_FromStringAndSize(&buf[end], next - end);
897  if (strchunk == NULL) {
898  goto bail;
899  }
900  if (has_unicode) {
901  chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
902  Py_DECREF(strchunk);
903  if (chunk == NULL) {
904  goto bail;
905  }
906  }
907  else {
908  chunk = strchunk;
909  }
910  }
911  next++;
912  if (c == '"') {
913  end = next;
914  break;
915  }
916  if (next == len) {
917  raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
918  goto bail;
919  }
920  c = buf[next];
921  if (c != 'u') {
922  /* Non-unicode backslash escapes */
923  end = next + 1;
924  switch (c) {
925  case '"': break;
926  case '\\': break;
927  case '/': break;
928  case 'b': c = '\b'; break;
929  case 'f': c = '\f'; break;
930  case 'n': c = '\n'; break;
931  case 'r': c = '\r'; break;
932  case 't': c = '\t'; break;
933  default: c = 0;
934  }
935  if (c == 0) {
936  raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
937  goto bail;
938  }
939  }
940  else {
941  c = 0;
942  next++;
943  end = next + 4;
944  if (end >= len) {
945  raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
946  goto bail;
947  }
948  /* Decode 4 hex digits */
949  for (; next < end; next++) {
950  JSON_UNICHR digit = (JSON_UNICHR)buf[next];
951  c <<= 4;
952  switch (digit) {
953  case '0': case '1': case '2': case '3': case '4':
954  case '5': case '6': case '7': case '8': case '9':
955  c |= (digit - '0'); break;
956  case 'a': case 'b': case 'c': case 'd': case 'e':
957  case 'f':
958  c |= (digit - 'a' + 10); break;
959  case 'A': case 'B': case 'C': case 'D': case 'E':
960  case 'F':
961  c |= (digit - 'A' + 10); break;
962  default:
963  raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
964  goto bail;
965  }
966  }
967 #if defined(Py_UNICODE_WIDE)
968  /* Surrogate pair */
969  if ((c & 0xfc00) == 0xd800) {
970  if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') {
971  JSON_UNICHR c2 = 0;
972  end += 6;
973  /* Decode 4 hex digits */
974  for (next += 2; next < end; next++) {
975  c2 <<= 4;
976  JSON_UNICHR digit = buf[next];
977  switch (digit) {
978  case '0': case '1': case '2': case '3': case '4':
979  case '5': case '6': case '7': case '8': case '9':
980  c2 |= (digit - '0'); break;
981  case 'a': case 'b': case 'c': case 'd': case 'e':
982  case 'f':
983  c2 |= (digit - 'a' + 10); break;
984  case 'A': case 'B': case 'C': case 'D': case 'E':
985  case 'F':
986  c2 |= (digit - 'A' + 10); break;
987  default:
988  raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
989  goto bail;
990  }
991  }
992  if ((c2 & 0xfc00) != 0xdc00) {
993  /* not a low surrogate, rewind */
994  end -= 6;
995  next = end;
996  }
997  else {
998  c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
999  }
1000  }
1001  }
1002 #endif /* Py_UNICODE_WIDE */
1003  }
1004  if (c > 0x7f) {
1005  has_unicode = 1;
1006  }
1008  if (has_unicode) {
1009  chunk = PyUnicode_FromOrdinal(c);
1010  if (chunk == NULL) {
1011  goto bail;
1012  }
1013  }
1014  else {
1015  char c_char = Py_CHARMASK(c);
1016  chunk = PyString_FromStringAndSize(&c_char, 1);
1017  if (chunk == NULL) {
1018  goto bail;
1019  }
1020  }
1021  }
1022 
1023  if (chunks == NULL) {
1024  if (chunk != NULL)
1025  rval = chunk;
1026  else {
1027  rval = JSON_EmptyStr;
1028  Py_INCREF(rval);
1029  }
1030  }
1031  else {
1033  rval = join_list_string(chunks);
1034  if (rval == NULL) {
1035  goto bail;
1036  }
1037  Py_CLEAR(chunks);
1038  }
1039 
1040  *next_end_ptr = end;
1041  return rval;
1042 bail:
1043  *next_end_ptr = -1;
1044  Py_XDECREF(chunk);
1045  Py_XDECREF(chunks);
1046  return NULL;
1047 }
1048 #endif /* PY_MAJOR_VERSION < 3 */
1049 
1050 static PyObject *
1051 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
1052 {
1053  /* Read the JSON string from PyUnicode pystr.
1054  end is the index of the first character after the quote.
1055  if strict is zero then literal control characters are allowed
1056  *next_end_ptr is a return-by-reference index of the character
1057  after the end quote
1058 
1059  Return value is a new PyUnicode
1060  */
1061  PyObject *rval;
1062  Py_ssize_t begin = end - 1;
1063  Py_ssize_t next = begin;
1064  PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1065  Py_ssize_t len = PyUnicode_GET_LENGTH(pystr);
1066  void *buf = PyUnicode_DATA(pystr);
1067  PyObject *chunks = NULL;
1068  PyObject *chunk = NULL;
1069 
1070  if (len == end) {
1071  raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
1072  goto bail;
1073  }
1074  else if (end < 0 || len < end) {
1075  PyErr_SetString(PyExc_ValueError, "end is out of bounds");
1076  goto bail;
1077  }
1078  while (1) {
1079  /* Find the end of the string or the next escape */
1080  JSON_UNICHR c = 0;
1081  for (next = end; next < len; next++) {
1082  c = PyUnicode_READ(kind, buf, next);
1083  if (c == '"' || c == '\\') {
1084  break;
1085  }
1086  else if (strict && c <= 0x1f) {
1087  raise_errmsg(ERR_STRING_CONTROL, pystr, next);
1088  goto bail;
1089  }
1090  }
1091  if (!(c == '"' || c == '\\')) {
1092  raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
1093  goto bail;
1094  }
1095  /* Pick up this chunk if it's not zero length */
1096  if (next != end) {
1098 #if PY_MAJOR_VERSION < 3
1099  chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end);
1100 #else
1101  chunk = PyUnicode_Substring(pystr, end, next);
1102 #endif
1103  if (chunk == NULL) {
1104  goto bail;
1105  }
1106  }
1107  next++;
1108  if (c == '"') {
1109  end = next;
1110  break;
1111  }
1112  if (next == len) {
1113  raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
1114  goto bail;
1115  }
1116  c = PyUnicode_READ(kind, buf, next);
1117  if (c != 'u') {
1118  /* Non-unicode backslash escapes */
1119  end = next + 1;
1120  switch (c) {
1121  case '"': break;
1122  case '\\': break;
1123  case '/': break;
1124  case 'b': c = '\b'; break;
1125  case 'f': c = '\f'; break;
1126  case 'n': c = '\n'; break;
1127  case 'r': c = '\r'; break;
1128  case 't': c = '\t'; break;
1129  default: c = 0;
1130  }
1131  if (c == 0) {
1132  raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
1133  goto bail;
1134  }
1135  }
1136  else {
1137  c = 0;
1138  next++;
1139  end = next + 4;
1140  if (end >= len) {
1141  raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
1142  goto bail;
1143  }
1144  /* Decode 4 hex digits */
1145  for (; next < end; next++) {
1146  JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
1147  c <<= 4;
1148  switch (digit) {
1149  case '0': case '1': case '2': case '3': case '4':
1150  case '5': case '6': case '7': case '8': case '9':
1151  c |= (digit - '0'); break;
1152  case 'a': case 'b': case 'c': case 'd': case 'e':
1153  case 'f':
1154  c |= (digit - 'a' + 10); break;
1155  case 'A': case 'B': case 'C': case 'D': case 'E':
1156  case 'F':
1157  c |= (digit - 'A' + 10); break;
1158  default:
1159  raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
1160  goto bail;
1161  }
1162  }
1163 #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
1164  /* Surrogate pair */
1165  if ((c & 0xfc00) == 0xd800) {
1166  JSON_UNICHR c2 = 0;
1167  if (end + 6 < len &&
1168  PyUnicode_READ(kind, buf, next) == '\\' &&
1169  PyUnicode_READ(kind, buf, next + 1) == 'u') {
1170  end += 6;
1171  /* Decode 4 hex digits */
1172  for (next += 2; next < end; next++) {
1173  JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
1174  c2 <<= 4;
1175  switch (digit) {
1176  case '0': case '1': case '2': case '3': case '4':
1177  case '5': case '6': case '7': case '8': case '9':
1178  c2 |= (digit - '0'); break;
1179  case 'a': case 'b': case 'c': case 'd': case 'e':
1180  case 'f':
1181  c2 |= (digit - 'a' + 10); break;
1182  case 'A': case 'B': case 'C': case 'D': case 'E':
1183  case 'F':
1184  c2 |= (digit - 'A' + 10); break;
1185  default:
1186  raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
1187  goto bail;
1188  }
1189  }
1190  if ((c2 & 0xfc00) != 0xdc00) {
1191  /* not a low surrogate, rewind */
1192  end -= 6;
1193  next = end;
1194  }
1195  else {
1196  c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
1197  }
1198  }
1199  }
1200 #endif
1201  }
1203  chunk = PyUnicode_FromOrdinal(c);
1204  if (chunk == NULL) {
1205  goto bail;
1206  }
1207  }
1208 
1209  if (chunks == NULL) {
1210  if (chunk != NULL)
1211  rval = chunk;
1212  else {
1213  rval = JSON_EmptyUnicode;
1214  Py_INCREF(rval);
1215  }
1216  }
1217  else {
1219  rval = join_list_unicode(chunks);
1220  if (rval == NULL) {
1221  goto bail;
1222  }
1223  Py_CLEAR(chunks);
1224  }
1225  *next_end_ptr = end;
1226  return rval;
1227 bail:
1228  *next_end_ptr = -1;
1229  Py_XDECREF(chunk);
1230  Py_XDECREF(chunks);
1231  return NULL;
1232 }
1233 
1234 PyDoc_STRVAR(pydoc_scanstring,
1235  "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
1236  "\n"
1237  "Scan the string s for a JSON string. End is the index of the\n"
1238  "character in s after the quote that started the JSON string.\n"
1239  "Unescapes all valid JSON string escape sequences and raises ValueError\n"
1240  "on attempt to decode an invalid string. If strict is False then literal\n"
1241  "control characters are allowed in the string.\n"
1242  "\n"
1243  "Returns a tuple of the decoded string and the index of the character in s\n"
1244  "after the end quote."
1245 );
1246 
1247 static PyObject *
1248 py_scanstring(PyObject* self UNUSED, PyObject *args)
1249 {
1250  PyObject *pystr;
1251  PyObject *rval;
1252  Py_ssize_t end;
1253  Py_ssize_t next_end = -1;
1254  char *encoding = NULL;
1255  int strict = 1;
1256  if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
1257  return NULL;
1258  }
1259  if (encoding == NULL) {
1260  encoding = DEFAULT_ENCODING;
1261  }
1262  if (PyUnicode_Check(pystr)) {
1263  if (PyUnicode_READY(pystr))
1264  return NULL;
1265  rval = scanstring_unicode(pystr, end, strict, &next_end);
1266  }
1267 #if PY_MAJOR_VERSION < 3
1268  /* Using a bytes input is unsupported for scanning in Python 3.
1269  It is coerced to str in the decoder before it gets here. */
1270  else if (PyString_Check(pystr)) {
1271  rval = scanstring_str(pystr, end, encoding, strict, &next_end);
1272  }
1273 #endif
1274  else {
1275  PyErr_Format(PyExc_TypeError,
1276  "first argument must be a string, not %.80s",
1277  Py_TYPE(pystr)->tp_name);
1278  return NULL;
1279  }
1280  return _build_rval_index_tuple(rval, next_end);
1281 }
1282 
1283 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
1284  "encode_basestring_ascii(basestring) -> str\n"
1285  "\n"
1286  "Return an ASCII-only JSON representation of a Python string"
1287 );
1288 
1289 static PyObject *
1290 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
1291 {
1292  /* Return an ASCII-only JSON representation of a Python string */
1293  /* METH_O */
1294  if (PyBytes_Check(pystr)) {
1295  return ascii_escape_str(pystr);
1296  }
1297  else if (PyUnicode_Check(pystr)) {
1298  if (PyUnicode_READY(pystr))
1299  return NULL;
1300  return ascii_escape_unicode(pystr);
1301  }
1302  else {
1303  PyErr_Format(PyExc_TypeError,
1304  "first argument must be a string, not %.80s",
1305  Py_TYPE(pystr)->tp_name);
1306  return NULL;
1307  }
1308 }
1309 
1310 static void
1311 scanner_dealloc(PyObject *self)
1312 {
1313  /* bpo-31095: UnTrack is needed before calling any callbacks */
1314  PyObject_GC_UnTrack(self);
1315  scanner_clear(self);
1316  Py_TYPE(self)->tp_free(self);
1317 }
1318 
1319 static int
1320 scanner_traverse(PyObject *self, visitproc visit, void *arg)
1321 {
1322  PyScannerObject *s;
1323  assert(PyScanner_Check(self));
1324  s = (PyScannerObject *)self;
1325  Py_VISIT(s->encoding);
1326  Py_VISIT(s->strict_bool);
1327  Py_VISIT(s->object_hook);
1328  Py_VISIT(s->pairs_hook);
1329  Py_VISIT(s->parse_float);
1330  Py_VISIT(s->parse_int);
1331  Py_VISIT(s->parse_constant);
1332  Py_VISIT(s->memo);
1333  return 0;
1334 }
1335 
1336 static int
1337 scanner_clear(PyObject *self)
1338 {
1339  PyScannerObject *s;
1340  assert(PyScanner_Check(self));
1341  s = (PyScannerObject *)self;
1342  Py_CLEAR(s->encoding);
1343  Py_CLEAR(s->strict_bool);
1344  Py_CLEAR(s->object_hook);
1345  Py_CLEAR(s->pairs_hook);
1346  Py_CLEAR(s->parse_float);
1347  Py_CLEAR(s->parse_int);
1348  Py_CLEAR(s->parse_constant);
1349  Py_CLEAR(s->memo);
1350  return 0;
1351 }
1352 
1353 #if PY_MAJOR_VERSION < 3
1354 static PyObject *
1355 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1356 {
1357  /* Read a JSON object from PyString pystr.
1358  idx is the index of the first character after the opening curly brace.
1359  *next_idx_ptr is a return-by-reference index to the first character after
1360  the closing curly brace.
1361 
1362  Returns a new PyObject (usually a dict, but object_hook or
1363  object_pairs_hook can change that)
1364  */
1365  char *str = PyString_AS_STRING(pystr);
1366  Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1367  PyObject *rval = NULL;
1368  PyObject *pairs = NULL;
1369  PyObject *item;
1370  PyObject *key = NULL;
1371  PyObject *val = NULL;
1372  char *encoding = PyString_AS_STRING(s->encoding);
1373  int has_pairs_hook = (s->pairs_hook != Py_None);
1374  int did_parse = 0;
1375  Py_ssize_t next_idx;
1376  if (has_pairs_hook) {
1377  pairs = PyList_New(0);
1378  if (pairs == NULL)
1379  return NULL;
1380  }
1381  else {
1382  rval = PyDict_New();
1383  if (rval == NULL)
1384  return NULL;
1385  }
1386 
1387  /* skip whitespace after { */
1388  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1389 
1390  /* only loop if the object is non-empty */
1391  if (idx <= end_idx && str[idx] != '}') {
1392  int trailing_delimiter = 0;
1393  while (idx <= end_idx) {
1394  PyObject *memokey;
1395  trailing_delimiter = 0;
1396 
1397  /* read key */
1398  if (str[idx] != '"') {
1399  raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1400  goto bail;
1401  }
1402  key = scanstring_str(pystr, idx + 1, encoding, s->strict, &next_idx);
1403  if (key == NULL)
1404  goto bail;
1405  memokey = PyDict_GetItem(s->memo, key);
1406  if (memokey != NULL) {
1407  Py_INCREF(memokey);
1408  Py_DECREF(key);
1409  key = memokey;
1410  }
1411  else {
1412  if (PyDict_SetItem(s->memo, key, key) < 0)
1413  goto bail;
1414  }
1415  idx = next_idx;
1416 
1417  /* skip whitespace between key and : delimiter, read :, skip whitespace */
1418  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1419  if (idx > end_idx || str[idx] != ':') {
1421  goto bail;
1422  }
1423  idx++;
1424  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1425 
1426  /* read any JSON data type */
1427  val = scan_once_str(s, pystr, idx, &next_idx);
1428  if (val == NULL)
1429  goto bail;
1430 
1431  if (has_pairs_hook) {
1432  item = PyTuple_Pack(2, key, val);
1433  if (item == NULL)
1434  goto bail;
1435  Py_CLEAR(key);
1436  Py_CLEAR(val);
1437  if (PyList_Append(pairs, item) == -1) {
1438  Py_DECREF(item);
1439  goto bail;
1440  }
1441  Py_DECREF(item);
1442  }
1443  else {
1444  if (PyDict_SetItem(rval, key, val) < 0)
1445  goto bail;
1446  Py_CLEAR(key);
1447  Py_CLEAR(val);
1448  }
1449  idx = next_idx;
1450 
1451  /* skip whitespace before } or , */
1452  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1453 
1454  /* bail if the object is closed or we didn't get the , delimiter */
1455  did_parse = 1;
1456  if (idx > end_idx) break;
1457  if (str[idx] == '}') {
1458  break;
1459  }
1460  else if (str[idx] != ',') {
1461  raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1462  goto bail;
1463  }
1464  idx++;
1465 
1466  /* skip whitespace after , delimiter */
1467  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1468  trailing_delimiter = 1;
1469  }
1470  if (trailing_delimiter) {
1471  raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1472  goto bail;
1473  }
1474  }
1475  /* verify that idx < end_idx, str[idx] should be '}' */
1476  if (idx > end_idx || str[idx] != '}') {
1477  if (did_parse) {
1478  raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1479  } else {
1481  }
1482  goto bail;
1483  }
1484 
1485  /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1486  if (s->pairs_hook != Py_None) {
1487  val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1488  if (val == NULL)
1489  goto bail;
1490  Py_DECREF(pairs);
1491  *next_idx_ptr = idx + 1;
1492  return val;
1493  }
1494 
1495  /* if object_hook is not None: rval = object_hook(rval) */
1496  if (s->object_hook != Py_None) {
1497  val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1498  if (val == NULL)
1499  goto bail;
1500  Py_DECREF(rval);
1501  rval = val;
1502  val = NULL;
1503  }
1504  *next_idx_ptr = idx + 1;
1505  return rval;
1506 bail:
1507  Py_XDECREF(rval);
1508  Py_XDECREF(key);
1509  Py_XDECREF(val);
1510  Py_XDECREF(pairs);
1511  return NULL;
1512 }
1513 #endif /* PY_MAJOR_VERSION < 3 */
1514 
1515 static PyObject *
1516 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1517 {
1518  /* Read a JSON object from PyUnicode pystr.
1519  idx is the index of the first character after the opening curly brace.
1520  *next_idx_ptr is a return-by-reference index to the first character after
1521  the closing curly brace.
1522 
1523  Returns a new PyObject (usually a dict, but object_hook can change that)
1524  */
1525  void *str = PyUnicode_DATA(pystr);
1526  Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
1527  PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1528  PyObject *rval = NULL;
1529  PyObject *pairs = NULL;
1530  PyObject *item;
1531  PyObject *key = NULL;
1532  PyObject *val = NULL;
1533  int has_pairs_hook = (s->pairs_hook != Py_None);
1534  int did_parse = 0;
1535  Py_ssize_t next_idx;
1536 
1537  if (has_pairs_hook) {
1538  pairs = PyList_New(0);
1539  if (pairs == NULL)
1540  return NULL;
1541  }
1542  else {
1543  rval = PyDict_New();
1544  if (rval == NULL)
1545  return NULL;
1546  }
1547 
1548  /* skip whitespace after { */
1549  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1550 
1551  /* only loop if the object is non-empty */
1552  if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
1553  int trailing_delimiter = 0;
1554  while (idx <= end_idx) {
1555  PyObject *memokey;
1556  trailing_delimiter = 0;
1557 
1558  /* read key */
1559  if (PyUnicode_READ(kind, str, idx) != '"') {
1560  raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1561  goto bail;
1562  }
1563  key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
1564  if (key == NULL)
1565  goto bail;
1566  memokey = PyDict_GetItem(s->memo, key);
1567  if (memokey != NULL) {
1568  Py_INCREF(memokey);
1569  Py_DECREF(key);
1570  key = memokey;
1571  }
1572  else {
1573  if (PyDict_SetItem(s->memo, key, key) < 0)
1574  goto bail;
1575  }
1576  idx = next_idx;
1577 
1578  /* skip whitespace between key and : delimiter, read :, skip
1579  whitespace */
1580  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1581  if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
1583  goto bail;
1584  }
1585  idx++;
1586  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1587 
1588  /* read any JSON term */
1589  val = scan_once_unicode(s, pystr, idx, &next_idx);
1590  if (val == NULL)
1591  goto bail;
1592 
1593  if (has_pairs_hook) {
1594  item = PyTuple_Pack(2, key, val);
1595  if (item == NULL)
1596  goto bail;
1597  Py_CLEAR(key);
1598  Py_CLEAR(val);
1599  if (PyList_Append(pairs, item) == -1) {
1600  Py_DECREF(item);
1601  goto bail;
1602  }
1603  Py_DECREF(item);
1604  }
1605  else {
1606  if (PyDict_SetItem(rval, key, val) < 0)
1607  goto bail;
1608  Py_CLEAR(key);
1609  Py_CLEAR(val);
1610  }
1611  idx = next_idx;
1612 
1613  /* skip whitespace before } or , */
1614  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1615 
1616  /* bail if the object is closed or we didn't get the ,
1617  delimiter */
1618  did_parse = 1;
1619  if (idx > end_idx) break;
1620  if (PyUnicode_READ(kind, str, idx) == '}') {
1621  break;
1622  }
1623  else if (PyUnicode_READ(kind, str, idx) != ',') {
1624  raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1625  goto bail;
1626  }
1627  idx++;
1628 
1629  /* skip whitespace after , delimiter */
1630  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1631  trailing_delimiter = 1;
1632  }
1633  if (trailing_delimiter) {
1634  raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1635  goto bail;
1636  }
1637  }
1638 
1639  /* verify that idx < end_idx, str[idx] should be '}' */
1640  if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
1641  if (did_parse) {
1642  raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1643  } else {
1645  }
1646  goto bail;
1647  }
1648 
1649  /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1650  if (s->pairs_hook != Py_None) {
1651  val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1652  if (val == NULL)
1653  goto bail;
1654  Py_DECREF(pairs);
1655  *next_idx_ptr = idx + 1;
1656  return val;
1657  }
1658 
1659  /* if object_hook is not None: rval = object_hook(rval) */
1660  if (s->object_hook != Py_None) {
1661  val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1662  if (val == NULL)
1663  goto bail;
1664  Py_DECREF(rval);
1665  rval = val;
1666  val = NULL;
1667  }
1668  *next_idx_ptr = idx + 1;
1669  return rval;
1670 bail:
1671  Py_XDECREF(rval);
1672  Py_XDECREF(key);
1673  Py_XDECREF(val);
1674  Py_XDECREF(pairs);
1675  return NULL;
1676 }
1677 
1678 #if PY_MAJOR_VERSION < 3
1679 static PyObject *
1680 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1681 {
1682  /* Read a JSON array from PyString pystr.
1683  idx is the index of the first character after the opening brace.
1684  *next_idx_ptr is a return-by-reference index to the first character after
1685  the closing brace.
1686 
1687  Returns a new PyList
1688  */
1689  char *str = PyString_AS_STRING(pystr);
1690  Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1691  PyObject *val = NULL;
1692  PyObject *rval = PyList_New(0);
1693  Py_ssize_t next_idx;
1694  if (rval == NULL)
1695  return NULL;
1696 
1697  /* skip whitespace after [ */
1698  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1699 
1700  /* only loop if the array is non-empty */
1701  if (idx <= end_idx && str[idx] != ']') {
1702  int trailing_delimiter = 0;
1703  while (idx <= end_idx) {
1704  trailing_delimiter = 0;
1705  /* read any JSON term and de-tuplefy the (rval, idx) */
1706  val = scan_once_str(s, pystr, idx, &next_idx);
1707  if (val == NULL) {
1708  goto bail;
1709  }
1710 
1711  if (PyList_Append(rval, val) == -1)
1712  goto bail;
1713 
1714  Py_CLEAR(val);
1715  idx = next_idx;
1716 
1717  /* skip whitespace between term and , */
1718  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1719 
1720  /* bail if the array is closed or we didn't get the , delimiter */
1721  if (idx > end_idx) break;
1722  if (str[idx] == ']') {
1723  break;
1724  }
1725  else if (str[idx] != ',') {
1726  raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1727  goto bail;
1728  }
1729  idx++;
1730 
1731  /* skip whitespace after , */
1732  while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1733  trailing_delimiter = 1;
1734  }
1735  if (trailing_delimiter) {
1736  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1737  goto bail;
1738  }
1739  }
1740 
1741  /* verify that idx < end_idx, str[idx] should be ']' */
1742  if (idx > end_idx || str[idx] != ']') {
1743  if (PyList_GET_SIZE(rval)) {
1744  raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1745  } else {
1746  raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
1747  }
1748  goto bail;
1749  }
1750  *next_idx_ptr = idx + 1;
1751  return rval;
1752 bail:
1753  Py_XDECREF(val);
1754  Py_DECREF(rval);
1755  return NULL;
1756 }
1757 #endif /* PY_MAJOR_VERSION < 3 */
1758 
1759 static PyObject *
1760 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1761 {
1762  /* Read a JSON array from PyString pystr.
1763  idx is the index of the first character after the opening brace.
1764  *next_idx_ptr is a return-by-reference index to the first character after
1765  the closing brace.
1766 
1767  Returns a new PyList
1768  */
1769  PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1770  void *str = PyUnicode_DATA(pystr);
1771  Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
1772  PyObject *val = NULL;
1773  PyObject *rval = PyList_New(0);
1774  Py_ssize_t next_idx;
1775  if (rval == NULL)
1776  return NULL;
1777 
1778  /* skip whitespace after [ */
1779  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1780 
1781  /* only loop if the array is non-empty */
1782  if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
1783  int trailing_delimiter = 0;
1784  while (idx <= end_idx) {
1785  trailing_delimiter = 0;
1786  /* read any JSON term */
1787  val = scan_once_unicode(s, pystr, idx, &next_idx);
1788  if (val == NULL) {
1789  goto bail;
1790  }
1791 
1792  if (PyList_Append(rval, val) == -1)
1793  goto bail;
1794 
1795  Py_CLEAR(val);
1796  idx = next_idx;
1797 
1798  /* skip whitespace between term and , */
1799  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1800 
1801  /* bail if the array is closed or we didn't get the , delimiter */
1802  if (idx > end_idx) break;
1803  if (PyUnicode_READ(kind, str, idx) == ']') {
1804  break;
1805  }
1806  else if (PyUnicode_READ(kind, str, idx) != ',') {
1807  raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1808  goto bail;
1809  }
1810  idx++;
1811 
1812  /* skip whitespace after , */
1813  while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1814  trailing_delimiter = 1;
1815  }
1816  if (trailing_delimiter) {
1817  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1818  goto bail;
1819  }
1820  }
1821 
1822  /* verify that idx < end_idx, str[idx] should be ']' */
1823  if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
1824  if (PyList_GET_SIZE(rval)) {
1825  raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1826  } else {
1827  raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
1828  }
1829  goto bail;
1830  }
1831  *next_idx_ptr = idx + 1;
1832  return rval;
1833 bail:
1834  Py_XDECREF(val);
1835  Py_DECREF(rval);
1836  return NULL;
1837 }
1838 
1839 static PyObject *
1840 _parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1841 {
1842  /* Read a JSON constant from PyString pystr.
1843  constant is the Python string that was found
1844  ("NaN", "Infinity", "-Infinity").
1845  idx is the index of the first character of the constant
1846  *next_idx_ptr is a return-by-reference index to the first character after
1847  the constant.
1848 
1849  Returns the result of parse_constant
1850  */
1851  PyObject *rval;
1852 
1853  /* rval = parse_constant(constant) */
1854  rval = PyObject_CallFunctionObjArgs(s->parse_constant, constant, NULL);
1855  idx += PyString_GET_SIZE(constant);
1856  *next_idx_ptr = idx;
1857  return rval;
1858 }
1859 
1860 #if PY_MAJOR_VERSION < 3
1861 static PyObject *
1862 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
1863 {
1864  /* Read a JSON number from PyString pystr.
1865  idx is the index of the first character of the number
1866  *next_idx_ptr is a return-by-reference index to the first character after
1867  the number.
1868 
1869  Returns a new PyObject representation of that number:
1870  PyInt, PyLong, or PyFloat.
1871  May return other types if parse_int or parse_float are set
1872  */
1873  char *str = PyString_AS_STRING(pystr);
1874  Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1875  Py_ssize_t idx = start;
1876  int is_float = 0;
1877  PyObject *rval;
1878  PyObject *numstr;
1879 
1880  /* read a sign if it's there, make sure it's not the end of the string */
1881  if (str[idx] == '-') {
1882  if (idx >= end_idx) {
1883  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1884  return NULL;
1885  }
1886  idx++;
1887  }
1888 
1889  /* read as many integer digits as we find as long as it doesn't start with 0 */
1890  if (str[idx] >= '1' && str[idx] <= '9') {
1891  idx++;
1892  while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1893  }
1894  /* if it starts with 0 we only expect one integer digit */
1895  else if (str[idx] == '0') {
1896  idx++;
1897  }
1898  /* no integer digits, error */
1899  else {
1900  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1901  return NULL;
1902  }
1903 
1904  /* if the next char is '.' followed by a digit then read all float digits */
1905  if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1906  is_float = 1;
1907  idx += 2;
1908  while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1909  }
1910 
1911  /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1912  if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1913 
1914  /* save the index of the 'e' or 'E' just in case we need to backtrack */
1915  Py_ssize_t e_start = idx;
1916  idx++;
1917 
1918  /* read an exponent sign if present */
1919  if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1920 
1921  /* read all digits */
1922  while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1923 
1924  /* if we got a digit, then parse as float. if not, backtrack */
1925  if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1926  is_float = 1;
1927  }
1928  else {
1929  idx = e_start;
1930  }
1931  }
1932 
1933  /* copy the section we determined to be a number */
1934  numstr = PyString_FromStringAndSize(&str[start], idx - start);
1935  if (numstr == NULL)
1936  return NULL;
1937  if (is_float) {
1938  /* parse as a float using a fast path if available, otherwise call user defined method */
1939  if (s->parse_float != (PyObject *)&PyFloat_Type) {
1940  rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1941  }
1942  else {
1943  /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1944  double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1945  NULL, NULL);
1946  if (d == -1.0 && PyErr_Occurred())
1947  return NULL;
1948  rval = PyFloat_FromDouble(d);
1949  }
1950  }
1951  else {
1952  /* parse as an int using a fast path if available, otherwise call user defined method */
1953  if (s->parse_int != (PyObject *)&PyInt_Type) {
1954  rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1955  }
1956  else {
1957  rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1958  }
1959  }
1960  Py_DECREF(numstr);
1961  *next_idx_ptr = idx;
1962  return rval;
1963 }
1964 #endif /* PY_MAJOR_VERSION < 3 */
1965 
1966 static PyObject *
1967 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
1968 {
1969  /* Read a JSON number from PyUnicode pystr.
1970  idx is the index of the first character of the number
1971  *next_idx_ptr is a return-by-reference index to the first character after
1972  the number.
1973 
1974  Returns a new PyObject representation of that number:
1975  PyInt, PyLong, or PyFloat.
1976  May return other types if parse_int or parse_float are set
1977  */
1978  PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1979  void *str = PyUnicode_DATA(pystr);
1980  Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
1981  Py_ssize_t idx = start;
1982  int is_float = 0;
1983  JSON_UNICHR c;
1984  PyObject *rval;
1985  PyObject *numstr;
1986 
1987  /* read a sign if it's there, make sure it's not the end of the string */
1988  if (PyUnicode_READ(kind, str, idx) == '-') {
1989  if (idx >= end_idx) {
1990  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1991  return NULL;
1992  }
1993  idx++;
1994  }
1995 
1996  /* read as many integer digits as we find as long as it doesn't start with 0 */
1997  c = PyUnicode_READ(kind, str, idx);
1998  if (c == '0') {
1999  /* if it starts with 0 we only expect one integer digit */
2000  idx++;
2001  }
2002  else if (IS_DIGIT(c)) {
2003  idx++;
2004  while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) {
2005  idx++;
2006  }
2007  }
2008  else {
2009  /* no integer digits, error */
2010  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2011  return NULL;
2012  }
2013 
2014  /* if the next char is '.' followed by a digit then read all float digits */
2015  if (idx < end_idx &&
2016  PyUnicode_READ(kind, str, idx) == '.' &&
2017  IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) {
2018  is_float = 1;
2019  idx += 2;
2020  while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
2021  }
2022 
2023  /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
2024  if (idx < end_idx &&
2025  (PyUnicode_READ(kind, str, idx) == 'e' ||
2026  PyUnicode_READ(kind, str, idx) == 'E')) {
2027  Py_ssize_t e_start = idx;
2028  idx++;
2029 
2030  /* read an exponent sign if present */
2031  if (idx < end_idx &&
2032  (PyUnicode_READ(kind, str, idx) == '-' ||
2033  PyUnicode_READ(kind, str, idx) == '+')) idx++;
2034 
2035  /* read all digits */
2036  while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
2037 
2038  /* if we got a digit, then parse as float. if not, backtrack */
2039  if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) {
2040  is_float = 1;
2041  }
2042  else {
2043  idx = e_start;
2044  }
2045  }
2046 
2047  /* copy the section we determined to be a number */
2048 #if PY_MAJOR_VERSION >= 3
2049  numstr = PyUnicode_Substring(pystr, start, idx);
2050 #else
2051  numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start);
2052 #endif
2053  if (numstr == NULL)
2054  return NULL;
2055  if (is_float) {
2056  /* parse as a float using a fast path if available, otherwise call user defined method */
2057  if (s->parse_float != (PyObject *)&PyFloat_Type) {
2058  rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
2059  }
2060  else {
2061 #if PY_MAJOR_VERSION >= 3
2062  rval = PyFloat_FromString(numstr);
2063 #else
2064  rval = PyFloat_FromString(numstr, NULL);
2065 #endif
2066  }
2067  }
2068  else {
2069  /* no fast path for unicode -> int, just call */
2070  rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
2071  }
2072  Py_DECREF(numstr);
2073  *next_idx_ptr = idx;
2074  return rval;
2075 }
2076 
2077 #if PY_MAJOR_VERSION < 3
2078 static PyObject *
2079 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
2080 {
2081  /* Read one JSON term (of any kind) from PyString pystr.
2082  idx is the index of the first character of the term
2083  *next_idx_ptr is a return-by-reference index to the first character after
2084  the number.
2085 
2086  Returns a new PyObject representation of the term.
2087  */
2088  char *str = PyString_AS_STRING(pystr);
2089  Py_ssize_t length = PyString_GET_SIZE(pystr);
2090  PyObject *rval = NULL;
2091  int fallthrough = 0;
2092  if (idx < 0 || idx >= length) {
2093  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2094  return NULL;
2095  }
2096  switch (str[idx]) {
2097  case '"':
2098  /* string */
2099  rval = scanstring_str(pystr, idx + 1,
2100  PyString_AS_STRING(s->encoding),
2101  s->strict,
2102  next_idx_ptr);
2103  break;
2104  case '{':
2105  /* object */
2106  if (Py_EnterRecursiveCall(" while decoding a JSON object "
2107  "from a string"))
2108  return NULL;
2109  rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
2110  Py_LeaveRecursiveCall();
2111  break;
2112  case '[':
2113  /* array */
2114  if (Py_EnterRecursiveCall(" while decoding a JSON array "
2115  "from a string"))
2116  return NULL;
2117  rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
2118  Py_LeaveRecursiveCall();
2119  break;
2120  case 'n':
2121  /* null */
2122  if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
2123  Py_INCREF(Py_None);
2124  *next_idx_ptr = idx + 4;
2125  rval = Py_None;
2126  }
2127  else
2128  fallthrough = 1;
2129  break;
2130  case 't':
2131  /* true */
2132  if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
2133  Py_INCREF(Py_True);
2134  *next_idx_ptr = idx + 4;
2135  rval = Py_True;
2136  }
2137  else
2138  fallthrough = 1;
2139  break;
2140  case 'f':
2141  /* false */
2142  if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
2143  Py_INCREF(Py_False);
2144  *next_idx_ptr = idx + 5;
2145  rval = Py_False;
2146  }
2147  else
2148  fallthrough = 1;
2149  break;
2150  case 'N':
2151  /* NaN */
2152  if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
2153  rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr);
2154  }
2155  else
2156  fallthrough = 1;
2157  break;
2158  case 'I':
2159  /* Infinity */
2160  if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
2161  rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr);
2162  }
2163  else
2164  fallthrough = 1;
2165  break;
2166  case '-':
2167  /* -Infinity */
2168  if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
2169  rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr);
2170  }
2171  else
2172  fallthrough = 1;
2173  break;
2174  default:
2175  fallthrough = 1;
2176  }
2177  /* Didn't find a string, object, array, or named constant. Look for a number. */
2178  if (fallthrough)
2179  rval = _match_number_str(s, pystr, idx, next_idx_ptr);
2180  return rval;
2181 }
2182 #endif /* PY_MAJOR_VERSION < 3 */
2183 
2184 
2185 static PyObject *
2186 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
2187 {
2188  /* Read one JSON term (of any kind) from PyUnicode pystr.
2189  idx is the index of the first character of the term
2190  *next_idx_ptr is a return-by-reference index to the first character after
2191  the number.
2192 
2193  Returns a new PyObject representation of the term.
2194  */
2195  PY2_UNUSED int kind = PyUnicode_KIND(pystr);
2196  void *str = PyUnicode_DATA(pystr);
2197  Py_ssize_t length = PyUnicode_GET_LENGTH(pystr);
2198  PyObject *rval = NULL;
2199  int fallthrough = 0;
2200  if (idx < 0 || idx >= length) {
2201  raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2202  return NULL;
2203  }
2204  switch (PyUnicode_READ(kind, str, idx)) {
2205  case '"':
2206  /* string */
2207  rval = scanstring_unicode(pystr, idx + 1,
2208  s->strict,
2209  next_idx_ptr);
2210  break;
2211  case '{':
2212  /* object */
2213  if (Py_EnterRecursiveCall(" while decoding a JSON object "
2214  "from a unicode string"))
2215  return NULL;
2216  rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
2217  Py_LeaveRecursiveCall();
2218  break;
2219  case '[':
2220  /* array */
2221  if (Py_EnterRecursiveCall(" while decoding a JSON array "
2222  "from a unicode string"))
2223  return NULL;
2224  rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
2225  Py_LeaveRecursiveCall();
2226  break;
2227  case 'n':
2228  /* null */
2229  if ((idx + 3 < length) &&
2230  PyUnicode_READ(kind, str, idx + 1) == 'u' &&
2231  PyUnicode_READ(kind, str, idx + 2) == 'l' &&
2232  PyUnicode_READ(kind, str, idx + 3) == 'l') {
2233  Py_INCREF(Py_None);
2234  *next_idx_ptr = idx + 4;
2235  rval = Py_None;
2236  }
2237  else
2238  fallthrough = 1;
2239  break;
2240  case 't':
2241  /* true */
2242  if ((idx + 3 < length) &&
2243  PyUnicode_READ(kind, str, idx + 1) == 'r' &&
2244  PyUnicode_READ(kind, str, idx + 2) == 'u' &&
2245  PyUnicode_READ(kind, str, idx + 3) == 'e') {
2246  Py_INCREF(Py_True);
2247  *next_idx_ptr = idx + 4;
2248  rval = Py_True;
2249  }
2250  else
2251  fallthrough = 1;
2252  break;
2253  case 'f':
2254  /* false */
2255  if ((idx + 4 < length) &&
2256  PyUnicode_READ(kind, str, idx + 1) == 'a' &&
2257  PyUnicode_READ(kind, str, idx + 2) == 'l' &&
2258  PyUnicode_READ(kind, str, idx + 3) == 's' &&
2259  PyUnicode_READ(kind, str, idx + 4) == 'e') {
2260  Py_INCREF(Py_False);
2261  *next_idx_ptr = idx + 5;
2262  rval = Py_False;
2263  }
2264  else
2265  fallthrough = 1;
2266  break;
2267  case 'N':
2268  /* NaN */
2269  if ((idx + 2 < length) &&
2270  PyUnicode_READ(kind, str, idx + 1) == 'a' &&
2271  PyUnicode_READ(kind, str, idx + 2) == 'N') {
2272  rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr);
2273  }
2274  else
2275  fallthrough = 1;
2276  break;
2277  case 'I':
2278  /* Infinity */
2279  if ((idx + 7 < length) &&
2280  PyUnicode_READ(kind, str, idx + 1) == 'n' &&
2281  PyUnicode_READ(kind, str, idx + 2) == 'f' &&
2282  PyUnicode_READ(kind, str, idx + 3) == 'i' &&
2283  PyUnicode_READ(kind, str, idx + 4) == 'n' &&
2284  PyUnicode_READ(kind, str, idx + 5) == 'i' &&
2285  PyUnicode_READ(kind, str, idx + 6) == 't' &&
2286  PyUnicode_READ(kind, str, idx + 7) == 'y') {
2287  rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr);
2288  }
2289  else
2290  fallthrough = 1;
2291  break;
2292  case '-':
2293  /* -Infinity */
2294  if ((idx + 8 < length) &&
2295  PyUnicode_READ(kind, str, idx + 1) == 'I' &&
2296  PyUnicode_READ(kind, str, idx + 2) == 'n' &&
2297  PyUnicode_READ(kind, str, idx + 3) == 'f' &&
2298  PyUnicode_READ(kind, str, idx + 4) == 'i' &&
2299  PyUnicode_READ(kind, str, idx + 5) == 'n' &&
2300  PyUnicode_READ(kind, str, idx + 6) == 'i' &&
2301  PyUnicode_READ(kind, str, idx + 7) == 't' &&
2302  PyUnicode_READ(kind, str, idx + 8) == 'y') {
2303  rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr);
2304  }
2305  else
2306  fallthrough = 1;
2307  break;
2308  default:
2309  fallthrough = 1;
2310  }
2311  /* Didn't find a string, object, array, or named constant. Look for a number. */
2312  if (fallthrough)
2313  rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
2314  return rval;
2315 }
2316 
2317 static PyObject *
2318 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
2319 {
2320  /* Python callable interface to scan_once_{str,unicode} */
2321  PyObject *pystr;
2322  PyObject *rval;
2323  Py_ssize_t idx;
2324  Py_ssize_t next_idx = -1;
2325  static char *kwlist[] = {"string", "idx", NULL};
2326  PyScannerObject *s;
2327  assert(PyScanner_Check(self));
2328  s = (PyScannerObject *)self;
2329  if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
2330  return NULL;
2331 
2332  if (PyUnicode_Check(pystr)) {
2333  if (PyUnicode_READY(pystr))
2334  return NULL;
2335  rval = scan_once_unicode(s, pystr, idx, &next_idx);
2336  }
2337 #if PY_MAJOR_VERSION < 3
2338  else if (PyString_Check(pystr)) {
2339  rval = scan_once_str(s, pystr, idx, &next_idx);
2340  }
2341 #endif /* PY_MAJOR_VERSION < 3 */
2342  else {
2343  PyErr_Format(PyExc_TypeError,
2344  "first argument must be a string, not %.80s",
2345  Py_TYPE(pystr)->tp_name);
2346  return NULL;
2347  }
2348  PyDict_Clear(s->memo);
2349  return _build_rval_index_tuple(rval, next_idx);
2350 }
2351 
2352 static PyObject *
2353 JSON_ParseEncoding(PyObject *encoding)
2354 {
2355  if (encoding == Py_None)
2357 #if PY_MAJOR_VERSION >= 3
2358  if (PyUnicode_Check(encoding)) {
2359  if (PyUnicode_AsUTF8(encoding) == NULL) {
2360  return NULL;
2361  }
2362  Py_INCREF(encoding);
2363  return encoding;
2364  }
2365 #else /* PY_MAJOR_VERSION >= 3 */
2366  if (PyString_Check(encoding)) {
2367  Py_INCREF(encoding);
2368  return encoding;
2369  }
2370  if (PyUnicode_Check(encoding))
2371  return PyUnicode_AsEncodedString(encoding, NULL, NULL);
2372 #endif /* PY_MAJOR_VERSION >= 3 */
2373  PyErr_SetString(PyExc_TypeError, "encoding must be a string");
2374  return NULL;
2375 }
2376 
2377 static PyObject *
2378 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2379 {
2380  /* Initialize Scanner object */
2381  PyObject *ctx;
2382  static char *kwlist[] = {"context", NULL};
2383  PyScannerObject *s;
2384  PyObject *encoding;
2385 
2386  if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
2387  return NULL;
2388 
2389  s = (PyScannerObject *)type->tp_alloc(type, 0);
2390  if (s == NULL)
2391  return NULL;
2392 
2393  if (s->memo == NULL) {
2394  s->memo = PyDict_New();
2395  if (s->memo == NULL)
2396  goto bail;
2397  }
2398 
2399  encoding = PyObject_GetAttrString(ctx, "encoding");
2400  if (encoding == NULL)
2401  goto bail;
2402  s->encoding = JSON_ParseEncoding(encoding);
2403  Py_XDECREF(encoding);
2404  if (s->encoding == NULL)
2405  goto bail;
2406 
2407  /* All of these will fail "gracefully" so we don't need to verify them */
2408  s->strict_bool = PyObject_GetAttrString(ctx, "strict");
2409  if (s->strict_bool == NULL)
2410  goto bail;
2411  s->strict = PyObject_IsTrue(s->strict_bool);
2412  if (s->strict < 0)
2413  goto bail;
2414  s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
2415  if (s->object_hook == NULL)
2416  goto bail;
2417  s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
2418  if (s->pairs_hook == NULL)
2419  goto bail;
2420  s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
2421  if (s->parse_float == NULL)
2422  goto bail;
2423  s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
2424  if (s->parse_int == NULL)
2425  goto bail;
2426  s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
2427  if (s->parse_constant == NULL)
2428  goto bail;
2429 
2430  return (PyObject *)s;
2431 
2432 bail:
2433  Py_DECREF(s);
2434  return NULL;
2435 }
2436 
2437 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
2438 
2439 static
2440 PyTypeObject PyScannerType = {
2441  PyVarObject_HEAD_INIT(NULL, 0)
2442  "simplejson._speedups.Scanner", /* tp_name */
2443  sizeof(PyScannerObject), /* tp_basicsize */
2444  0, /* tp_itemsize */
2445  scanner_dealloc, /* tp_dealloc */
2446  0, /* tp_print */
2447  0, /* tp_getattr */
2448  0, /* tp_setattr */
2449  0, /* tp_compare */
2450  0, /* tp_repr */
2451  0, /* tp_as_number */
2452  0, /* tp_as_sequence */
2453  0, /* tp_as_mapping */
2454  0, /* tp_hash */
2455  scanner_call, /* tp_call */
2456  0, /* tp_str */
2457  0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
2458  0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
2459  0, /* tp_as_buffer */
2460  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2461  scanner_doc, /* tp_doc */
2462  scanner_traverse, /* tp_traverse */
2463  scanner_clear, /* tp_clear */
2464  0, /* tp_richcompare */
2465  0, /* tp_weaklistoffset */
2466  0, /* tp_iter */
2467  0, /* tp_iternext */
2468  0, /* tp_methods */
2469  scanner_members, /* tp_members */
2470  0, /* tp_getset */
2471  0, /* tp_base */
2472  0, /* tp_dict */
2473  0, /* tp_descr_get */
2474  0, /* tp_descr_set */
2475  0, /* tp_dictoffset */
2476  0, /* tp_init */
2477  0,/* PyType_GenericAlloc, */ /* tp_alloc */
2478  scanner_new, /* tp_new */
2479  0,/* PyObject_GC_Del, */ /* tp_free */
2480 };
2481 
2482 static PyObject *
2483 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2484 {
2485  static char *kwlist[] = {
2486  "markers",
2487  "default",
2488  "encoder",
2489  "indent",
2490  "key_separator",
2491  "item_separator",
2492  "sort_keys",
2493  "skipkeys",
2494  "allow_nan",
2495  "key_memo",
2496  "use_decimal",
2497  "namedtuple_as_object",
2498  "tuple_as_array",
2499  "int_as_string_bitcount",
2500  "item_sort_key",
2501  "encoding",
2502  "for_json",
2503  "ignore_nan",
2504  "Decimal",
2505  "iterable_as_array",
2506  NULL};
2507 
2508  PyEncoderObject *s;
2509  PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
2510  PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
2511  PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array;
2512  PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json;
2513  PyObject *ignore_nan, *Decimal;
2514  int is_true;
2515 
2516  if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOOO:make_encoder", kwlist,
2517  &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
2518  &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
2519  &namedtuple_as_object, &tuple_as_array,
2520  &int_as_string_bitcount, &item_sort_key, &encoding, &for_json,
2521  &ignore_nan, &Decimal, &iterable_as_array))
2522  return NULL;
2523 
2524  s = (PyEncoderObject *)type->tp_alloc(type, 0);
2525  if (s == NULL)
2526  return NULL;
2527 
2528  Py_INCREF(markers);
2529  s->markers = markers;
2530  Py_INCREF(defaultfn);
2531  s->defaultfn = defaultfn;
2532  Py_INCREF(encoder);
2533  s->encoder = encoder;
2534 #if PY_MAJOR_VERSION >= 3
2535  if (encoding == Py_None) {
2536  s->encoding = NULL;
2537  }
2538  else
2539 #endif /* PY_MAJOR_VERSION >= 3 */
2540  {
2541  s->encoding = JSON_ParseEncoding(encoding);
2542  if (s->encoding == NULL)
2543  goto bail;
2544  }
2545  Py_INCREF(indent);
2546  s->indent = indent;
2547  Py_INCREF(key_separator);
2548  s->key_separator = key_separator;
2549  Py_INCREF(item_separator);
2550  s->item_separator = item_separator;
2551  Py_INCREF(skipkeys);
2552  s->skipkeys_bool = skipkeys;
2553  s->skipkeys = PyObject_IsTrue(skipkeys);
2554  if (s->skipkeys < 0)
2555  goto bail;
2556  Py_INCREF(key_memo);
2557  s->key_memo = key_memo;
2558  s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
2559  is_true = PyObject_IsTrue(ignore_nan);
2560  if (is_true < 0)
2561  goto bail;
2562  s->allow_or_ignore_nan = is_true ? JSON_IGNORE_NAN : 0;
2563  is_true = PyObject_IsTrue(allow_nan);
2564  if (is_true < 0)
2565  goto bail;
2566  s->allow_or_ignore_nan |= is_true ? JSON_ALLOW_NAN : 0;
2567  s->use_decimal = PyObject_IsTrue(use_decimal);
2568  if (s->use_decimal < 0)
2569  goto bail;
2570  s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
2571  if (s->namedtuple_as_object < 0)
2572  goto bail;
2573  s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
2574  if (s->tuple_as_array < 0)
2575  goto bail;
2576  s->iterable_as_array = PyObject_IsTrue(iterable_as_array);
2577  if (s->iterable_as_array < 0)
2578  goto bail;
2579  if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) {
2580  static const unsigned long long_long_bitsize = SIZEOF_LONG_LONG * 8;
2581  long int_as_string_bitcount_val = PyLong_AsLong(int_as_string_bitcount);
2582  if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < (long)long_long_bitsize) {
2583  s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << (int)int_as_string_bitcount_val);
2584  s->min_long_size = PyLong_FromLongLong(-1LL << (int)int_as_string_bitcount_val);
2585  if (s->min_long_size == NULL || s->max_long_size == NULL) {
2586  goto bail;
2587  }
2588  }
2589  else {
2590  PyErr_Format(PyExc_TypeError,
2591  "int_as_string_bitcount (%ld) must be greater than 0 and less than the number of bits of a `long long` type (%lu bits)",
2592  int_as_string_bitcount_val, long_long_bitsize);
2593  goto bail;
2594  }
2595  }
2596  else if (int_as_string_bitcount == Py_None) {
2597  Py_INCREF(Py_None);
2598  s->max_long_size = Py_None;
2599  Py_INCREF(Py_None);
2600  s->min_long_size = Py_None;
2601  }
2602  else {
2603  PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer");
2604  goto bail;
2605  }
2606  if (item_sort_key != Py_None) {
2607  if (!PyCallable_Check(item_sort_key)) {
2608  PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable");
2609  goto bail;
2610  }
2611  }
2612  else {
2613  is_true = PyObject_IsTrue(sort_keys);
2614  if (is_true < 0)
2615  goto bail;
2616  if (is_true) {
2617  static PyObject *itemgetter0 = NULL;
2618  if (!itemgetter0) {
2619  PyObject *operator = PyImport_ImportModule("operator");
2620  if (!operator)
2621  goto bail;
2622  itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0);
2623  Py_DECREF(operator);
2624  }
2625  item_sort_key = itemgetter0;
2626  if (!item_sort_key)
2627  goto bail;
2628  }
2629  }
2630  if (item_sort_key == Py_None) {
2631  Py_INCREF(Py_None);
2632  s->item_sort_kw = Py_None;
2633  }
2634  else {
2635  s->item_sort_kw = PyDict_New();
2636  if (s->item_sort_kw == NULL)
2637  goto bail;
2638  if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key))
2639  goto bail;
2640  }
2641  Py_INCREF(sort_keys);
2642  s->sort_keys = sort_keys;
2643  Py_INCREF(item_sort_key);
2644  s->item_sort_key = item_sort_key;
2645  Py_INCREF(Decimal);
2646  s->Decimal = Decimal;
2647  s->for_json = PyObject_IsTrue(for_json);
2648  if (s->for_json < 0)
2649  goto bail;
2650 
2651  return (PyObject *)s;
2652 
2653 bail:
2654  Py_DECREF(s);
2655  return NULL;
2656 }
2657 
2658 static PyObject *
2659 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
2660 {
2661  /* Python callable interface to encode_listencode_obj */
2662  static char *kwlist[] = {"obj", "_current_indent_level", NULL};
2663  PyObject *obj;
2664  Py_ssize_t indent_level;
2665  PyEncoderObject *s;
2666  JSON_Accu rval;
2667  assert(PyEncoder_Check(self));
2668  s = (PyEncoderObject *)self;
2669  if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2670  &obj, _convertPyInt_AsSsize_t, &indent_level))
2671  return NULL;
2672  if (JSON_Accu_Init(&rval))
2673  return NULL;
2674  if (encoder_listencode_obj(s, &rval, obj, indent_level)) {
2675  JSON_Accu_Destroy(&rval);
2676  return NULL;
2677  }
2678  return JSON_Accu_FinishAsList(&rval);
2679 }
2680 
2681 static PyObject *
2682 _encoded_const(PyObject *obj)
2683 {
2684  /* Return the JSON string representation of None, True, False */
2685  if (obj == Py_None) {
2686  static PyObject *s_null = NULL;
2687  if (s_null == NULL) {
2688  s_null = JSON_InternFromString("null");
2689  }
2690  Py_INCREF(s_null);
2691  return s_null;
2692  }
2693  else if (obj == Py_True) {
2694  static PyObject *s_true = NULL;
2695  if (s_true == NULL) {
2696  s_true = JSON_InternFromString("true");
2697  }
2698  Py_INCREF(s_true);
2699  return s_true;
2700  }
2701  else if (obj == Py_False) {
2702  static PyObject *s_false = NULL;
2703  if (s_false == NULL) {
2704  s_false = JSON_InternFromString("false");
2705  }
2706  Py_INCREF(s_false);
2707  return s_false;
2708  }
2709  else {
2710  PyErr_SetString(PyExc_ValueError, "not a const");
2711  return NULL;
2712  }
2713 }
2714 
2715 static PyObject *
2717 {
2718  /* Return the JSON representation of a PyFloat */
2719  double i = PyFloat_AS_DOUBLE(obj);
2720  if (!Py_IS_FINITE(i)) {
2721  if (!s->allow_or_ignore_nan) {
2722  PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2723  return NULL;
2724  }
2726  return _encoded_const(Py_None);
2727  }
2728  /* JSON_ALLOW_NAN is set */
2729  else if (i > 0) {
2730  Py_INCREF(JSON_Infinity);
2731  return JSON_Infinity;
2732  }
2733  else if (i < 0) {
2734  Py_INCREF(JSON_NegInfinity);
2735  return JSON_NegInfinity;
2736  }
2737  else {
2738  Py_INCREF(JSON_NaN);
2739  return JSON_NaN;
2740  }
2741  }
2742  /* Use a better float format here? */
2743  if (PyFloat_CheckExact(obj)) {
2744  return PyObject_Repr(obj);
2745  }
2746  else {
2747  /* See #118, do not trust custom str/repr */
2748  PyObject *res;
2749  PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyFloat_Type, obj, NULL);
2750  if (tmp == NULL) {
2751  return NULL;
2752  }
2753  res = PyObject_Repr(tmp);
2754  Py_DECREF(tmp);
2755  return res;
2756  }
2757 }
2758 
2759 static PyObject *
2761 {
2762  /* Return the JSON representation of a string */
2763  PyObject *encoded;
2764 
2765  if (s->fast_encode) {
2766  return py_encode_basestring_ascii(NULL, obj);
2767  }
2768  encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2769  if (encoded != NULL &&
2770 #if PY_MAJOR_VERSION < 3
2771  !PyString_Check(encoded) &&
2772 #endif /* PY_MAJOR_VERSION < 3 */
2773  !PyUnicode_Check(encoded))
2774  {
2775  PyErr_Format(PyExc_TypeError,
2776  "encoder() must return a string, not %.80s",
2777  Py_TYPE(encoded)->tp_name);
2778  Py_DECREF(encoded);
2779  return NULL;
2780  }
2781  return encoded;
2782 }
2783 
2784 static int
2785 _steal_accumulate(JSON_Accu *accu, PyObject *stolen)
2786 {
2787  /* Append stolen and then decrement its reference count */
2788  int rval = JSON_Accu_Accumulate(accu, stolen);
2789  Py_DECREF(stolen);
2790  return rval;
2791 }
2792 
2793 static int
2794 encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level)
2795 {
2796  /* Encode Python object obj to a JSON term, rval is a PyList */
2797  int rv = -1;
2798  do {
2799  if (obj == Py_None || obj == Py_True || obj == Py_False) {
2800  PyObject *cstr = _encoded_const(obj);
2801  if (cstr != NULL)
2802  rv = _steal_accumulate(rval, cstr);
2803  }
2804  else if ((PyBytes_Check(obj) && s->encoding != NULL) ||
2805  PyUnicode_Check(obj))
2806  {
2807  PyObject *encoded = encoder_encode_string(s, obj);
2808  if (encoded != NULL)
2809  rv = _steal_accumulate(rval, encoded);
2810  }
2811  else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2812  PyObject *encoded;
2813  if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) {
2814  encoded = PyObject_Str(obj);
2815  }
2816  else {
2817  /* See #118, do not trust custom str/repr */
2818  PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, obj, NULL);
2819  if (tmp == NULL) {
2820  encoded = NULL;
2821  }
2822  else {
2823  encoded = PyObject_Str(tmp);
2824  Py_DECREF(tmp);
2825  }
2826  }
2827  if (encoded != NULL) {
2828  encoded = maybe_quote_bigint(s, encoded, obj);
2829  if (encoded == NULL)
2830  break;
2831  rv = _steal_accumulate(rval, encoded);
2832  }
2833  }
2834  else if (PyFloat_Check(obj)) {
2835  PyObject *encoded = encoder_encode_float(s, obj);
2836  if (encoded != NULL)
2837  rv = _steal_accumulate(rval, encoded);
2838  }
2839  else if (s->for_json && _has_for_json_hook(obj)) {
2840  PyObject *newobj;
2841  if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2842  return rv;
2843  newobj = PyObject_CallMethod(obj, "for_json", NULL);
2844  if (newobj != NULL) {
2845  rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2846  Py_DECREF(newobj);
2847  }
2848  Py_LeaveRecursiveCall();
2849  }
2850  else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
2851  PyObject *newobj;
2852  if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2853  return rv;
2854  newobj = PyObject_CallMethod(obj, "_asdict", NULL);
2855  if (newobj != NULL) {
2856  rv = encoder_listencode_dict(s, rval, newobj, indent_level);
2857  Py_DECREF(newobj);
2858  }
2859  Py_LeaveRecursiveCall();
2860  }
2861  else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
2862  if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2863  return rv;
2864  rv = encoder_listencode_list(s, rval, obj, indent_level);
2865  Py_LeaveRecursiveCall();
2866  }
2867  else if (PyDict_Check(obj)) {
2868  if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2869  return rv;
2870  rv = encoder_listencode_dict(s, rval, obj, indent_level);
2871  Py_LeaveRecursiveCall();
2872  }
2873  else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) {
2874  PyObject *encoded = PyObject_Str(obj);
2875  if (encoded != NULL)
2876  rv = _steal_accumulate(rval, encoded);
2877  }
2878  else if (is_raw_json(obj))
2879  {
2880  PyObject *encoded = PyObject_GetAttrString(obj, "encoded_json");
2881  if (encoded != NULL)
2882  rv = _steal_accumulate(rval, encoded);
2883  }
2884  else {
2885  PyObject *ident = NULL;
2886  PyObject *newobj;
2887  if (s->iterable_as_array) {
2888  newobj = PyObject_GetIter(obj);
2889  if (newobj == NULL)
2890  PyErr_Clear();
2891  else {
2892  rv = encoder_listencode_list(s, rval, newobj, indent_level);
2893  Py_DECREF(newobj);
2894  break;
2895  }
2896  }
2897  if (s->markers != Py_None) {
2898  int has_key;
2899  ident = PyLong_FromVoidPtr(obj);
2900  if (ident == NULL)
2901  break;
2902  has_key = PyDict_Contains(s->markers, ident);
2903  if (has_key) {
2904  if (has_key != -1)
2905  PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2906  Py_DECREF(ident);
2907  break;
2908  }
2909  if (PyDict_SetItem(s->markers, ident, obj)) {
2910  Py_DECREF(ident);
2911  break;
2912  }
2913  }
2914  if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2915  return rv;
2916  newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2917  if (newobj == NULL) {
2918  Py_XDECREF(ident);
2919  Py_LeaveRecursiveCall();
2920  break;
2921  }
2922  rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2923  Py_LeaveRecursiveCall();
2924  Py_DECREF(newobj);
2925  if (rv) {
2926  Py_XDECREF(ident);
2927  rv = -1;
2928  }
2929  else if (ident != NULL) {
2930  if (PyDict_DelItem(s->markers, ident)) {
2931  Py_XDECREF(ident);
2932  rv = -1;
2933  }
2934  Py_XDECREF(ident);
2935  }
2936  }
2937  } while (0);
2938  return rv;
2939 }
2940 
2941 static int
2942 encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level)
2943 {
2944  /* Encode Python dict dct a JSON term */
2945  static PyObject *open_dict = NULL;
2946  static PyObject *close_dict = NULL;
2947  static PyObject *empty_dict = NULL;
2948  PyObject *kstr = NULL;
2949  PyObject *ident = NULL;
2950  PyObject *iter = NULL;
2951  PyObject *item = NULL;
2952  PyObject *items = NULL;
2953  PyObject *encoded = NULL;
2954  Py_ssize_t idx;
2955 
2956  if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2957  open_dict = JSON_InternFromString("{");
2958  close_dict = JSON_InternFromString("}");
2959  empty_dict = JSON_InternFromString("{}");
2960  if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2961  return -1;
2962  }
2963  if (PyDict_Size(dct) == 0)
2964  return JSON_Accu_Accumulate(rval, empty_dict);
2965 
2966  if (s->markers != Py_None) {
2967  int has_key;
2968  ident = PyLong_FromVoidPtr(dct);
2969  if (ident == NULL)
2970  goto bail;
2971  has_key = PyDict_Contains(s->markers, ident);
2972  if (has_key) {
2973  if (has_key != -1)
2974  PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2975  goto bail;
2976  }
2977  if (PyDict_SetItem(s->markers, ident, dct)) {
2978  goto bail;
2979  }
2980  }
2981 
2982  if (JSON_Accu_Accumulate(rval, open_dict))
2983  goto bail;
2984 
2985  if (s->indent != Py_None) {
2986  /* TODO: DOES NOT RUN */
2987  indent_level += 1;
2988  /*
2989  newline_indent = '\n' + (_indent * _current_indent_level)
2990  separator = _item_separator + newline_indent
2991  buf += newline_indent
2992  */
2993  }
2994 
2995  iter = encoder_dict_iteritems(s, dct);
2996  if (iter == NULL)
2997  goto bail;
2998 
2999  idx = 0;
3000  while ((item = PyIter_Next(iter))) {
3001  PyObject *encoded, *key, *value;
3002  if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
3003  PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
3004  goto bail;
3005  }
3006  key = PyTuple_GET_ITEM(item, 0);
3007  if (key == NULL)
3008  goto bail;
3009  value = PyTuple_GET_ITEM(item, 1);
3010  if (value == NULL)
3011  goto bail;
3012 
3013  encoded = PyDict_GetItem(s->key_memo, key);
3014  if (encoded != NULL) {
3015  Py_INCREF(encoded);
3016  } else {
3017  kstr = encoder_stringify_key(s, key);
3018  if (kstr == NULL)
3019  goto bail;
3020  else if (kstr == Py_None) {
3021  /* skipkeys */
3022  Py_DECREF(item);
3023  Py_DECREF(kstr);
3024  continue;
3025  }
3026  }
3027  if (idx) {
3028  if (JSON_Accu_Accumulate(rval, s->item_separator))
3029  goto bail;
3030  }
3031  if (encoded == NULL) {
3032  encoded = encoder_encode_string(s, kstr);
3033  Py_CLEAR(kstr);
3034  if (encoded == NULL)
3035  goto bail;
3036  if (PyDict_SetItem(s->key_memo, key, encoded))
3037  goto bail;
3038  }
3039  if (JSON_Accu_Accumulate(rval, encoded)) {
3040  goto bail;
3041  }
3042  Py_CLEAR(encoded);
3043  if (JSON_Accu_Accumulate(rval, s->key_separator))
3044  goto bail;
3045  if (encoder_listencode_obj(s, rval, value, indent_level))
3046  goto bail;
3047  Py_CLEAR(item);
3048  idx += 1;
3049  }
3050  Py_CLEAR(iter);
3051  if (PyErr_Occurred())
3052  goto bail;
3053  if (ident != NULL) {
3054  if (PyDict_DelItem(s->markers, ident))
3055  goto bail;
3056  Py_CLEAR(ident);
3057  }
3058  if (s->indent != Py_None) {
3059  /* TODO: DOES NOT RUN */
3060  indent_level -= 1;
3061  /*
3062  yield '\n' + (_indent * _current_indent_level)
3063  */
3064  }
3065  if (JSON_Accu_Accumulate(rval, close_dict))
3066  goto bail;
3067  return 0;
3068 
3069 bail:
3070  Py_XDECREF(encoded);
3071  Py_XDECREF(items);
3072  Py_XDECREF(item);
3073  Py_XDECREF(iter);
3074  Py_XDECREF(kstr);
3075  Py_XDECREF(ident);
3076  return -1;
3077 }
3078 
3079 
3080 static int
3081 encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level)
3082 {
3083  /* Encode Python list seq to a JSON term */
3084  static PyObject *open_array = NULL;
3085  static PyObject *close_array = NULL;
3086  static PyObject *empty_array = NULL;
3087  PyObject *ident = NULL;
3088  PyObject *iter = NULL;
3089  PyObject *obj = NULL;
3090  int is_true;
3091  int i = 0;
3092 
3093  if (open_array == NULL || close_array == NULL || empty_array == NULL) {
3094  open_array = JSON_InternFromString("[");
3095  close_array = JSON_InternFromString("]");
3096  empty_array = JSON_InternFromString("[]");
3097  if (open_array == NULL || close_array == NULL || empty_array == NULL)
3098  return -1;
3099  }
3100  ident = NULL;
3101  is_true = PyObject_IsTrue(seq);
3102  if (is_true == -1)
3103  return -1;
3104  else if (is_true == 0)
3105  return JSON_Accu_Accumulate(rval, empty_array);
3106 
3107  if (s->markers != Py_None) {
3108  int has_key;
3109  ident = PyLong_FromVoidPtr(seq);
3110  if (ident == NULL)
3111  goto bail;
3112  has_key = PyDict_Contains(s->markers, ident);
3113  if (has_key) {
3114  if (has_key != -1)
3115  PyErr_SetString(PyExc_ValueError, "Circular reference detected");
3116  goto bail;
3117  }
3118  if (PyDict_SetItem(s->markers, ident, seq)) {
3119  goto bail;
3120  }
3121  }
3122 
3123  iter = PyObject_GetIter(seq);
3124  if (iter == NULL)
3125  goto bail;
3126 
3127  if (JSON_Accu_Accumulate(rval, open_array))
3128  goto bail;
3129  if (s->indent != Py_None) {
3130  /* TODO: DOES NOT RUN */
3131  indent_level += 1;
3132  /*
3133  newline_indent = '\n' + (_indent * _current_indent_level)
3134  separator = _item_separator + newline_indent
3135  buf += newline_indent
3136  */
3137  }
3138  while ((obj = PyIter_Next(iter))) {
3139  if (i) {
3140  if (JSON_Accu_Accumulate(rval, s->item_separator))
3141  goto bail;
3142  }
3143  if (encoder_listencode_obj(s, rval, obj, indent_level))
3144  goto bail;
3145  i++;
3146  Py_CLEAR(obj);
3147  }
3148  Py_CLEAR(iter);
3149  if (PyErr_Occurred())
3150  goto bail;
3151  if (ident != NULL) {
3152  if (PyDict_DelItem(s->markers, ident))
3153  goto bail;
3154  Py_CLEAR(ident);
3155  }
3156  if (s->indent != Py_None) {
3157  /* TODO: DOES NOT RUN */
3158  indent_level -= 1;
3159  /*
3160  yield '\n' + (_indent * _current_indent_level)
3161  */
3162  }
3163  if (JSON_Accu_Accumulate(rval, close_array))
3164  goto bail;
3165  return 0;
3166 
3167 bail:
3168  Py_XDECREF(obj);
3169  Py_XDECREF(iter);
3170  Py_XDECREF(ident);
3171  return -1;
3172 }
3173 
3174 static void
3175 encoder_dealloc(PyObject *self)
3176 {
3177  /* bpo-31095: UnTrack is needed before calling any callbacks */
3178  PyObject_GC_UnTrack(self);
3179  encoder_clear(self);
3180  Py_TYPE(self)->tp_free(self);
3181 }
3182 
3183 static int
3184 encoder_traverse(PyObject *self, visitproc visit, void *arg)
3185 {
3186  PyEncoderObject *s;
3187  assert(PyEncoder_Check(self));
3188  s = (PyEncoderObject *)self;
3189  Py_VISIT(s->markers);
3190  Py_VISIT(s->defaultfn);
3191  Py_VISIT(s->encoder);
3192  Py_VISIT(s->encoding);
3193  Py_VISIT(s->indent);
3194  Py_VISIT(s->key_separator);
3195  Py_VISIT(s->item_separator);
3196  Py_VISIT(s->key_memo);
3197  Py_VISIT(s->sort_keys);
3198  Py_VISIT(s->item_sort_kw);
3199  Py_VISIT(s->item_sort_key);
3200  Py_VISIT(s->max_long_size);
3201  Py_VISIT(s->min_long_size);
3202  Py_VISIT(s->Decimal);
3203  return 0;
3204 }
3205 
3206 static int
3207 encoder_clear(PyObject *self)
3208 {
3209  /* Deallocate Encoder */
3210  PyEncoderObject *s;
3211  assert(PyEncoder_Check(self));
3212  s = (PyEncoderObject *)self;
3213  Py_CLEAR(s->markers);
3214  Py_CLEAR(s->defaultfn);
3215  Py_CLEAR(s->encoder);
3216  Py_CLEAR(s->encoding);
3217  Py_CLEAR(s->indent);
3218  Py_CLEAR(s->key_separator);
3219  Py_CLEAR(s->item_separator);
3220  Py_CLEAR(s->key_memo);
3221  Py_CLEAR(s->skipkeys_bool);
3222  Py_CLEAR(s->sort_keys);
3223  Py_CLEAR(s->item_sort_kw);
3224  Py_CLEAR(s->item_sort_key);
3225  Py_CLEAR(s->max_long_size);
3226  Py_CLEAR(s->min_long_size);
3227  Py_CLEAR(s->Decimal);
3228  return 0;
3229 }
3230 
3231 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
3232 
3233 static
3234 PyTypeObject PyEncoderType = {
3235  PyVarObject_HEAD_INIT(NULL, 0)
3236  "simplejson._speedups.Encoder", /* tp_name */
3237  sizeof(PyEncoderObject), /* tp_basicsize */
3238  0, /* tp_itemsize */
3239  encoder_dealloc, /* tp_dealloc */
3240  0, /* tp_print */
3241  0, /* tp_getattr */
3242  0, /* tp_setattr */
3243  0, /* tp_compare */
3244  0, /* tp_repr */
3245  0, /* tp_as_number */
3246  0, /* tp_as_sequence */
3247  0, /* tp_as_mapping */
3248  0, /* tp_hash */
3249  encoder_call, /* tp_call */
3250  0, /* tp_str */
3251  0, /* tp_getattro */
3252  0, /* tp_setattro */
3253  0, /* tp_as_buffer */
3254  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3255  encoder_doc, /* tp_doc */
3256  encoder_traverse, /* tp_traverse */
3257  encoder_clear, /* tp_clear */
3258  0, /* tp_richcompare */
3259  0, /* tp_weaklistoffset */
3260  0, /* tp_iter */
3261  0, /* tp_iternext */
3262  0, /* tp_methods */
3263  encoder_members, /* tp_members */
3264  0, /* tp_getset */
3265  0, /* tp_base */
3266  0, /* tp_dict */
3267  0, /* tp_descr_get */
3268  0, /* tp_descr_set */
3269  0, /* tp_dictoffset */
3270  0, /* tp_init */
3271  0, /* tp_alloc */
3272  encoder_new, /* tp_new */
3273  0, /* tp_free */
3274 };
3275 
3276 static PyMethodDef speedups_methods[] = {
3277  {"encode_basestring_ascii",
3278  (PyCFunction)py_encode_basestring_ascii,
3279  METH_O,
3280  pydoc_encode_basestring_ascii},
3281  {"scanstring",
3282  (PyCFunction)py_scanstring,
3283  METH_VARARGS,
3284  pydoc_scanstring},
3285  {NULL, NULL, 0, NULL}
3286 };
3287 
3288 PyDoc_STRVAR(module_doc,
3289 "simplejson speedups\n");
3290 
3291 #if PY_MAJOR_VERSION >= 3
3292 static struct PyModuleDef moduledef = {
3293  PyModuleDef_HEAD_INIT,
3294  "_speedups", /* m_name */
3295  module_doc, /* m_doc */
3296  -1, /* m_size */
3297  speedups_methods, /* m_methods */
3298  NULL, /* m_reload */
3299  NULL, /* m_traverse */
3300  NULL, /* m_clear*/
3301  NULL, /* m_free */
3302 };
3303 #endif
3304 
3305 PyObject *
3306 import_dependency(char *module_name, char *attr_name)
3307 {
3308  PyObject *rval;
3309  PyObject *module = PyImport_ImportModule(module_name);
3310  if (module == NULL)
3311  return NULL;
3312  rval = PyObject_GetAttrString(module, attr_name);
3313  Py_DECREF(module);
3314  return rval;
3315 }
3316 
3317 static int
3319 {
3321  if (JSON_NaN == NULL)
3322  return 0;
3323  JSON_Infinity = JSON_InternFromString("Infinity");
3324  if (JSON_Infinity == NULL)
3325  return 0;
3326  JSON_NegInfinity = JSON_InternFromString("-Infinity");
3327  if (JSON_NegInfinity == NULL)
3328  return 0;
3329 #if PY_MAJOR_VERSION >= 3
3330  JSON_EmptyUnicode = PyUnicode_New(0, 127);
3331 #else /* PY_MAJOR_VERSION >= 3 */
3332  JSON_EmptyStr = PyString_FromString("");
3333  if (JSON_EmptyStr == NULL)
3334  return 0;
3335  JSON_EmptyUnicode = PyUnicode_FromUnicode(NULL, 0);
3336 #endif /* PY_MAJOR_VERSION >= 3 */
3337  if (JSON_EmptyUnicode == NULL)
3338  return 0;
3339 
3340  return 1;
3341 }
3342 
3343 static PyObject *
3345 {
3346  PyObject *m;
3347  if (PyType_Ready(&PyScannerType) < 0)
3348  return NULL;
3349  if (PyType_Ready(&PyEncoderType) < 0)
3350  return NULL;
3351  if (!init_constants())
3352  return NULL;
3353 
3354 #if PY_MAJOR_VERSION >= 3
3355  m = PyModule_Create(&moduledef);
3356 #else
3357  m = Py_InitModule3("_speedups", speedups_methods, module_doc);
3358 #endif
3359  Py_INCREF((PyObject*)&PyScannerType);
3360  PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
3361  Py_INCREF((PyObject*)&PyEncoderType);
3362  PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
3363  RawJSONType = import_dependency("simplejson.raw_json", "RawJSON");
3364  if (RawJSONType == NULL)
3365  return NULL;
3366  JSONDecodeError = import_dependency("simplejson.errors", "JSONDecodeError");
3367  if (JSONDecodeError == NULL)
3368  return NULL;
3369  return m;
3370 }
3371 
3372 #if PY_MAJOR_VERSION >= 3
3373 PyMODINIT_FUNC
3374 PyInit__speedups(void)
3375 {
3376  return moduleinit();
3377 }
3378 #else
3379 void
3381 {
3382  moduleinit();
3383 }
3384 #endif
encoder_encode_float
static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj)
Definition: _speedups.c:2716
PyEncoderType
static PyTypeObject PyEncoderType
Definition: _speedups.c:84
ascii_char_size
static Py_ssize_t ascii_char_size(JSON_UNICHR c)
Definition: _speedups.c:475
_PyEncoderObject::Decimal
PyObject * Decimal
Definition: _speedups.c:147
py_encode_basestring_ascii
static PyObject * py_encode_basestring_ascii(PyObject *self UNUSED, PyObject *pystr)
Definition: _speedups.c:1290
PyEncoderObject
struct _PyEncoderObject PyEncoderObject
PyVarObject_HEAD_INIT
#define PyVarObject_HEAD_INIT(type, size)
Definition: _speedups.c:55
_PyEncoderObject::use_decimal
int use_decimal
Definition: _speedups.c:153
_PyEncoderObject::item_separator
PyObject * item_separator
Definition: _speedups.c:143
_build_rval_index_tuple
static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx)
Definition: _speedups.c:800
JSON_InternFromString
#define JSON_InternFromString
Definition: _speedups.c:25
flush_accumulator
static int flush_accumulator(JSON_Accu *acc)
Definition: _speedups.c:279
JSON_Accu
Definition: _speedups.c:86
_PyEncoderObject::encoder
PyObject * encoder
Definition: _speedups.c:140
PyUnicode_KIND
#define PyUnicode_KIND(obj)
Definition: _speedups.c:20
_convertPyInt_FromSsize_t
static PyObject * _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
Definition: _speedups.c:424
_is_namedtuple
static int _is_namedtuple(PyObject *obj)
Definition: _speedups.c:386
_PyEncoderObject::iterable_as_array
int iterable_as_array
Definition: _speedups.c:156
join_list_unicode
static PyObject * join_list_unicode(PyObject *lst)
Definition: _speedups.c:776
maybe_quote_bigint
static PyObject * maybe_quote_bigint(PyEncoderObject *s, PyObject *encoded, PyObject *obj)
Definition: _speedups.c:366
_PyScannerObject::memo
PyObject * memo
Definition: _speedups.c:122
_PyEncoderObject::sort_keys
PyObject * sort_keys
Definition: _speedups.c:144
_PyEncoderObject::fast_encode
int fast_encode
Definition: _speedups.c:150
scanner_traverse
static int scanner_traverse(PyObject *self, visitproc visit, void *arg)
Definition: _speedups.c:1320
_PyEncoderObject::min_long_size
PyObject * min_long_size
Definition: _speedups.c:158
_PyEncoderObject::indent
PyObject * indent
Definition: _speedups.c:141
PyEncoder_Check
#define PyEncoder_Check(op)
Definition: _speedups.c:69
encoder_listencode_list
static int encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level)
Definition: _speedups.c:3081
ERR_STRING_CONTROL
#define ERR_STRING_CONTROL
Definition: _speedups.c:108
_PyEncoderObject::skipkeys
int skipkeys
Definition: _speedups.c:149
_PyEncoderObject::skipkeys_bool
PyObject * skipkeys_bool
Definition: _speedups.c:148
Py_SIZE
#define Py_SIZE(ob)
Definition: _speedups.c:52
PyUnicode_READY
#define PyUnicode_READY(obj)
Definition: _speedups.c:19
ERR_STRING_ESC4
#define ERR_STRING_ESC4
Definition: _speedups.c:110
IS_WHITESPACE
#define IS_WHITESPACE(c)
Definition: _speedups.c:256
json_PyOS_string_to_double
static double json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
Definition: _speedups.c:34
_parse_object_str
static PyObject * _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1355
PyBytes_Check
#define PyBytes_Check
Definition: _speedups.c:18
_PyEncoderObject::defaultfn
PyObject * defaultfn
Definition: _speedups.c:139
raise_errmsg
static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
Definition: _speedups.c:766
scanner_dealloc
static void scanner_dealloc(PyObject *self)
Definition: _speedups.c:1311
IS_DIGIT
static int IS_DIGIT(JSON_UNICHR c)
Definition: _speedups.c:360
_PyScannerObject::parse_float
PyObject * parse_float
Definition: _speedups.c:119
moduleinit
static PyObject * moduleinit(void)
Definition: _speedups.c:3344
_PyScannerObject
Definition: _speedups.c:112
encoder_call
static PyObject * encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
Definition: _speedups.c:2659
_parse_array_str
static PyObject * _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1680
JSON_UNICHR
#define JSON_UNICHR
Definition: _speedups.c:24
encoder_traverse
static int encoder_traverse(PyObject *self, visitproc visit, void *arg)
Definition: _speedups.c:3184
speedups_methods
static PyMethodDef speedups_methods[]
Definition: _speedups.c:3276
ERR_OBJECT_PROPERTY
#define ERR_OBJECT_PROPERTY
Definition: _speedups.c:104
_PyScannerObject::object_hook
PyObject * object_hook
Definition: _speedups.c:117
PyUnicode_GET_LENGTH
#define PyUnicode_GET_LENGTH
Definition: _speedups.c:23
ERR_ARRAY_DELIMITER
#define ERR_ARRAY_DELIMITER
Definition: _speedups.c:101
ERR_STRING_UNTERMINATED
#define ERR_STRING_UNTERMINATED
Definition: _speedups.c:107
aestate.start
def start()
Definition: __init__.py:8
scanner_members
static PyMemberDef scanner_members[]
Definition: _speedups.c:125
scanner_call
static PyObject * scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
Definition: _speedups.c:2318
ERR_OBJECT_DELIMITER
#define ERR_OBJECT_DELIMITER
Definition: _speedups.c:103
_PyEncoderObject::for_json
int for_json
Definition: _speedups.c:161
py_scanstring
static PyObject * py_scanstring(PyObject *self UNUSED, PyObject *args)
Definition: _speedups.c:1248
_PyEncoderObject::item_sort_key
PyObject * item_sort_key
Definition: _speedups.c:159
_PyEncoderObject::item_sort_kw
PyObject * item_sort_kw
Definition: _speedups.c:160
join_list_string
static PyObject * join_list_string(PyObject *lst)
Definition: _speedups.c:786
ERR_ARRAY_VALUE_FIRST
#define ERR_ARRAY_VALUE_FIRST
Definition: _speedups.c:102
is_raw_json
static int is_raw_json(PyObject *obj)
Definition: _speedups.c:262
PyScannerType
static PyTypeObject PyScannerType
Definition: _speedups.c:83
MIN_EXPANSION
#define MIN_EXPANSION
Definition: _speedups.c:258
JSON_Accu_Accumulate
static int JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode)
Definition: _speedups.c:309
JSON_NaN
static PyObject * JSON_NaN
Definition: _speedups.c:77
scanstring_unicode
static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Definition: _speedups.c:1051
init_constants
static int init_constants(void)
Definition: _speedups.c:3318
_PyScannerObject::strict
int strict
Definition: _speedups.c:116
_PyScannerObject::strict_bool
PyObject * strict_bool
Definition: _speedups.c:115
PyUnicode_READ
#define PyUnicode_READ(kind, data, index)
Definition: _speedups.c:22
_PyEncoderObject::markers
PyObject_HEAD PyObject * markers
Definition: _speedups.c:138
encoder_listencode_dict
static int encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level)
Definition: _speedups.c:2942
_PyScannerObject::parse_int
PyObject * parse_int
Definition: _speedups.c:120
encoder_encode_string
static PyObject * encoder_encode_string(PyEncoderObject *s, PyObject *obj)
Definition: _speedups.c:2760
scan_once_unicode
static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:2186
JSON_Accu_Destroy
static void JSON_Accu_Destroy(JSON_Accu *acc)
Definition: _speedups.c:353
_PyEncoderObject::encoding
PyObject * encoding
Definition: _speedups.c:146
_PyEncoderObject::allow_or_ignore_nan
int allow_or_ignore_nan
Definition: _speedups.c:152
ERR_OBJECT_PROPERTY_FIRST
#define ERR_OBJECT_PROPERTY_FIRST
Definition: _speedups.c:105
_PyEncoderObject::key_memo
PyObject * key_memo
Definition: _speedups.c:145
encoder_listencode_obj
static int encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level)
Definition: _speedups.c:2794
ERR_STRING_ESC1
#define ERR_STRING_ESC1
Definition: _speedups.c:109
RawJSONType
static PyObject * RawJSONType
Definition: _speedups.c:260
_has_for_json_hook
static int _has_for_json_hook(PyObject *obj)
Definition: _speedups.c:400
encoder_stringify_key
static PyObject * encoder_stringify_key(PyEncoderObject *s, PyObject *key)
Definition: _speedups.c:606
_parse_array_unicode
static PyObject * _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1760
JSON_Accu_Init
static int JSON_Accu_Init(JSON_Accu *acc)
Definition: _speedups.c:268
JSONDecodeError
static PyObject * JSONDecodeError
Definition: _speedups.c:764
_PyEncoderObject::key_separator
PyObject * key_separator
Definition: _speedups.c:142
_PyScannerObject::encoding
PyObject_HEAD PyObject * encoding
Definition: _speedups.c:114
APPEND_OLD_CHUNK
#define APPEND_OLD_CHUNK
Definition: _speedups.c:828
scanner_clear
static int scanner_clear(PyObject *self)
Definition: _speedups.c:1337
S_CHAR
#define S_CHAR(c)
Definition: _speedups.c:255
Py_TYPE
#define Py_TYPE(ob)
Definition: _speedups.c:49
scanner_new
static PyObject * scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Definition: _speedups.c:2378
encoder_dealloc
static void encoder_dealloc(PyObject *self)
Definition: _speedups.c:3175
PyScannerObject
struct _PyScannerObject PyScannerObject
JSON_IGNORE_NAN
#define JSON_IGNORE_NAN
Definition: _speedups.c:73
UNUSED
#define UNUSED
Definition: _speedups.c:62
JSON_Accu::large_strings
PyObject * large_strings
Definition: _speedups.c:87
_parse_object_unicode
static PyObject * _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1516
JSON_EmptyStr
static PyObject * JSON_EmptyStr
Definition: _speedups.c:80
PY2_UNUSED
#define PY2_UNUSED
Definition: _speedups.c:16
DEFAULT_ENCODING
#define DEFAULT_ENCODING
Definition: _speedups.c:65
_PyEncoderObject::tuple_as_array
int tuple_as_array
Definition: _speedups.c:155
JSON_Infinity
static PyObject * JSON_Infinity
Definition: _speedups.c:75
ascii_escape_str
static PyObject * ascii_escape_str(PyObject *pystr)
Definition: _speedups.c:557
encoder_clear
static int encoder_clear(PyObject *self)
Definition: _speedups.c:3207
JSON_ALLOW_NAN
#define JSON_ALLOW_NAN
Definition: _speedups.c:72
ascii_escape_unicode
static PyObject * ascii_escape_unicode(PyObject *pystr)
Definition: _speedups.c:500
init_speedups
void init_speedups(void)
Definition: _speedups.c:3380
ascii_escape_char
static Py_ssize_t ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars)
Definition: _speedups.c:431
PyOS_string_to_double
#define PyOS_string_to_double
Definition: _speedups.c:30
ERR_EXPECTING_VALUE
#define ERR_EXPECTING_VALUE
Definition: _speedups.c:100
JSON_EmptyUnicode
static PyObject * JSON_EmptyUnicode
Definition: _speedups.c:78
_match_number_str
static PyObject * _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1862
import_dependency
PyObject * import_dependency(char *module_name, char *attr_name)
Definition: _speedups.c:3306
ERR_OBJECT_PROPERTY_DELIMITER
#define ERR_OBJECT_PROPERTY_DELIMITER
Definition: _speedups.c:106
_steal_accumulate
static int _steal_accumulate(JSON_Accu *accu, PyObject *stolen)
Definition: _speedups.c:2785
_PyEncoderObject::namedtuple_as_object
int namedtuple_as_object
Definition: _speedups.c:154
JSON_Accu_FinishAsList
static PyObject * JSON_Accu_FinishAsList(JSON_Accu *acc)
Definition: _speedups.c:334
_PyEncoderObject::max_long_size
PyObject * max_long_size
Definition: _speedups.c:157
_convertPyInt_AsSsize_t
static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
Definition: _speedups.c:414
PyUnicode_DATA
#define PyUnicode_DATA(obj)
Definition: _speedups.c:21
encoder_new
static PyObject * encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Definition: _speedups.c:2483
PyDoc_STRVAR
PyDoc_STRVAR(pydoc_scanstring, "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" "\n" "Scan the string s for a JSON string. End is the index of the\n" "character in s after the quote that started the JSON string.\n" "Unescapes all valid JSON string escape sequences and raises ValueError\n" "on attempt to decode an invalid string. If strict is False then literal\n" "control characters are allowed in the string.\n" "\n" "Returns a tuple of the decoded string and the index of the character in s\n" "after the end quote.")
scan_once_str
static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:2079
JSON_ParseEncoding
static PyObject * JSON_ParseEncoding(PyObject *encoding)
Definition: _speedups.c:2353
_encoded_const
static PyObject * _encoded_const(PyObject *obj)
Definition: _speedups.c:2682
_PyScannerObject::pairs_hook
PyObject * pairs_hook
Definition: _speedups.c:118
encoder_dict_iteritems
static PyObject * encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct)
Definition: _speedups.c:667
PyScanner_Check
#define PyScanner_Check(op)
Definition: _speedups.c:67
JSON_NegInfinity
static PyObject * JSON_NegInfinity
Definition: _speedups.c:76
_parse_constant
static PyObject * _parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1840
scanstring_str
static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
Definition: _speedups.c:844
JSON_Accu::small_strings
PyObject * small_strings
Definition: _speedups.c:88
_PyScannerObject::parse_constant
PyObject * parse_constant
Definition: _speedups.c:121
_match_number_unicode
static PyObject * _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
Definition: _speedups.c:1967
_PyEncoderObject
Definition: _speedups.c:136
encoder_members
static PyMemberDef encoder_members[]
Definition: _speedups.c:164