Fix localization.
[clinton/xbmc-groove.git] / resources / lib / simplejson / _speedups.c
1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
16
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
22
23 #define DEFAULT_ENCODING "utf-8"
24
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
32
33 typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
38 PyObject *parse_float;
39 PyObject *parse_int;
40 PyObject *parse_constant;
41 } PyScannerObject;
42
43 static PyMemberDef scanner_members[] = {
44 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
45 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
46 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
47 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
48 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
49 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
50 {NULL}
51 };
52
53 typedef struct _PyEncoderObject {
54 PyObject_HEAD
55 PyObject *markers;
56 PyObject *defaultfn;
57 PyObject *encoder;
58 PyObject *indent;
59 PyObject *key_separator;
60 PyObject *item_separator;
61 PyObject *sort_keys;
62 PyObject *skipkeys;
63 int fast_encode;
64 int allow_nan;
65 } PyEncoderObject;
66
67 static PyMemberDef encoder_members[] = {
68 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
69 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
70 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
71 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
72 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
73 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
74 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
75 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
76 {NULL}
77 };
78
79 static Py_ssize_t
80 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
81 static PyObject *
82 ascii_escape_unicode(PyObject *pystr);
83 static PyObject *
84 ascii_escape_str(PyObject *pystr);
85 static PyObject *
86 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
87 void init_speedups(void);
88 static PyObject *
89 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
90 static PyObject *
91 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
94 static PyObject *
95 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
96 static int
97 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
98 static void
99 scanner_dealloc(PyObject *self);
100 static int
101 scanner_clear(PyObject *self);
102 static PyObject *
103 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
104 static int
105 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
106 static void
107 encoder_dealloc(PyObject *self);
108 static int
109 encoder_clear(PyObject *self);
110 static int
111 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
112 static int
113 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
116 static PyObject *
117 _encoded_const(PyObject *const);
118 static void
119 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
120 static PyObject *
121 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
122 static int
123 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
124 static PyObject *
125 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
126 static PyObject *
127 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
128
129 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
130 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
131
132 #define MIN_EXPANSION 6
133 #ifdef Py_UNICODE_WIDE
134 #define MAX_EXPANSION (2 * MIN_EXPANSION)
135 #else
136 #define MAX_EXPANSION MIN_EXPANSION
137 #endif
138
139 static int
140 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
141 {
142 /* PyObject to Py_ssize_t converter */
143 *size_ptr = PyInt_AsSsize_t(o);
144 if (*size_ptr == -1 && PyErr_Occurred());
145 return 1;
146 return 0;
147 }
148
149 static PyObject *
150 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
151 {
152 /* Py_ssize_t to PyObject converter */
153 return PyInt_FromSsize_t(*size_ptr);
154 }
155
156 static Py_ssize_t
157 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
158 {
159 /* Escape unicode code point c to ASCII escape sequences
160 in char *output. output must have at least 12 bytes unused to
161 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
162 output[chars++] = '\\';
163 switch (c) {
164 case '\\': output[chars++] = (char)c; break;
165 case '"': output[chars++] = (char)c; break;
166 case '\b': output[chars++] = 'b'; break;
167 case '\f': output[chars++] = 'f'; break;
168 case '\n': output[chars++] = 'n'; break;
169 case '\r': output[chars++] = 'r'; break;
170 case '\t': output[chars++] = 't'; break;
171 default:
172 #ifdef Py_UNICODE_WIDE
173 if (c >= 0x10000) {
174 /* UTF-16 surrogate pair */
175 Py_UNICODE v = c - 0x10000;
176 c = 0xd800 | ((v >> 10) & 0x3ff);
177 output[chars++] = 'u';
178 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
179 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
180 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
182 c = 0xdc00 | (v & 0x3ff);
183 output[chars++] = '\\';
184 }
185 #endif
186 output[chars++] = 'u';
187 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
188 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
189 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
191 }
192 return chars;
193 }
194
195 static PyObject *
196 ascii_escape_unicode(PyObject *pystr)
197 {
198 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
199 Py_ssize_t i;
200 Py_ssize_t input_chars;
201 Py_ssize_t output_size;
202 Py_ssize_t max_output_size;
203 Py_ssize_t chars;
204 PyObject *rval;
205 char *output;
206 Py_UNICODE *input_unicode;
207
208 input_chars = PyUnicode_GET_SIZE(pystr);
209 input_unicode = PyUnicode_AS_UNICODE(pystr);
210
211 /* One char input can be up to 6 chars output, estimate 4 of these */
212 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
213 max_output_size = 2 + (input_chars * MAX_EXPANSION);
214 rval = PyString_FromStringAndSize(NULL, output_size);
215 if (rval == NULL) {
216 return NULL;
217 }
218 output = PyString_AS_STRING(rval);
219 chars = 0;
220 output[chars++] = '"';
221 for (i = 0; i < input_chars; i++) {
222 Py_UNICODE c = input_unicode[i];
223 if (S_CHAR(c)) {
224 output[chars++] = (char)c;
225 }
226 else {
227 chars = ascii_escape_char(c, output, chars);
228 }
229 if (output_size - chars < (1 + MAX_EXPANSION)) {
230 /* There's more than four, so let's resize by a lot */
231 Py_ssize_t new_output_size = output_size * 2;
232 /* This is an upper bound */
233 if (new_output_size > max_output_size) {
234 new_output_size = max_output_size;
235 }
236 /* Make sure that the output size changed before resizing */
237 if (new_output_size != output_size) {
238 output_size = new_output_size;
239 if (_PyString_Resize(&rval, output_size) == -1) {
240 return NULL;
241 }
242 output = PyString_AS_STRING(rval);
243 }
244 }
245 }
246 output[chars++] = '"';
247 if (_PyString_Resize(&rval, chars) == -1) {
248 return NULL;
249 }
250 return rval;
251 }
252
253 static PyObject *
254 ascii_escape_str(PyObject *pystr)
255 {
256 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
257 Py_ssize_t i;
258 Py_ssize_t input_chars;
259 Py_ssize_t output_size;
260 Py_ssize_t chars;
261 PyObject *rval;
262 char *output;
263 char *input_str;
264
265 input_chars = PyString_GET_SIZE(pystr);
266 input_str = PyString_AS_STRING(pystr);
267
268 /* Fast path for a string that's already ASCII */
269 for (i = 0; i < input_chars; i++) {
270 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
271 if (!S_CHAR(c)) {
272 /* If we have to escape something, scan the string for unicode */
273 Py_ssize_t j;
274 for (j = i; j < input_chars; j++) {
275 c = (Py_UNICODE)(unsigned char)input_str[j];
276 if (c > 0x7f) {
277 /* We hit a non-ASCII character, bail to unicode mode */
278 PyObject *uni;
279 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
280 if (uni == NULL) {
281 return NULL;
282 }
283 rval = ascii_escape_unicode(uni);
284 Py_DECREF(uni);
285 return rval;
286 }
287 }
288 break;
289 }
290 }
291
292 if (i == input_chars) {
293 /* Input is already ASCII */
294 output_size = 2 + input_chars;
295 }
296 else {
297 /* One char input can be up to 6 chars output, estimate 4 of these */
298 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
299 }
300 rval = PyString_FromStringAndSize(NULL, output_size);
301 if (rval == NULL) {
302 return NULL;
303 }
304 output = PyString_AS_STRING(rval);
305 output[0] = '"';
306
307 /* We know that everything up to i is ASCII already */
308 chars = i + 1;
309 memcpy(&output[1], input_str, i);
310
311 for (; i < input_chars; i++) {
312 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
313 if (S_CHAR(c)) {
314 output[chars++] = (char)c;
315 }
316 else {
317 chars = ascii_escape_char(c, output, chars);
318 }
319 /* An ASCII char can't possibly expand to a surrogate! */
320 if (output_size - chars < (1 + MIN_EXPANSION)) {
321 /* There's more than four, so let's resize by a lot */
322 output_size *= 2;
323 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
324 output_size = 2 + (input_chars * MIN_EXPANSION);
325 }
326 if (_PyString_Resize(&rval, output_size) == -1) {
327 return NULL;
328 }
329 output = PyString_AS_STRING(rval);
330 }
331 }
332 output[chars++] = '"';
333 if (_PyString_Resize(&rval, chars) == -1) {
334 return NULL;
335 }
336 return rval;
337 }
338
339 static void
340 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
341 {
342 /* Use the Python function simplejson.decoder.errmsg to raise a nice
343 looking ValueError exception */
344 static PyObject *errmsg_fn = NULL;
345 PyObject *pymsg;
346 if (errmsg_fn == NULL) {
347 PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
348 if (decoder == NULL)
349 return;
350 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
351 Py_DECREF(decoder);
352 if (errmsg_fn == NULL)
353 return;
354 }
355 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
356 if (pymsg) {
357 PyErr_SetObject(PyExc_ValueError, pymsg);
358 Py_DECREF(pymsg);
359 }
360 }
361
362 static PyObject *
363 join_list_unicode(PyObject *lst)
364 {
365 /* return u''.join(lst) */
366 static PyObject *joinfn = NULL;
367 if (joinfn == NULL) {
368 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
369 if (ustr == NULL)
370 return NULL;
371
372 joinfn = PyObject_GetAttrString(ustr, "join");
373 Py_DECREF(ustr);
374 if (joinfn == NULL)
375 return NULL;
376 }
377 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
378 }
379
380 static PyObject *
381 join_list_string(PyObject *lst)
382 {
383 /* return ''.join(lst) */
384 static PyObject *joinfn = NULL;
385 if (joinfn == NULL) {
386 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
387 if (ustr == NULL)
388 return NULL;
389
390 joinfn = PyObject_GetAttrString(ustr, "join");
391 Py_DECREF(ustr);
392 if (joinfn == NULL)
393 return NULL;
394 }
395 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
396 }
397
398 static PyObject *
399 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
400 /* return (rval, idx) tuple, stealing reference to rval */
401 PyObject *tpl;
402 PyObject *pyidx;
403 /*
404 steal a reference to rval, returns (rval, idx)
405 */
406 if (rval == NULL) {
407 return NULL;
408 }
409 pyidx = PyInt_FromSsize_t(idx);
410 if (pyidx == NULL) {
411 Py_DECREF(rval);
412 return NULL;
413 }
414 tpl = PyTuple_New(2);
415 if (tpl == NULL) {
416 Py_DECREF(pyidx);
417 Py_DECREF(rval);
418 return NULL;
419 }
420 PyTuple_SET_ITEM(tpl, 0, rval);
421 PyTuple_SET_ITEM(tpl, 1, pyidx);
422 return tpl;
423 }
424
425 static PyObject *
426 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
427 {
428 /* Read the JSON string from PyString pystr.
429 end is the index of the first character after the quote.
430 encoding is the encoding of pystr (must be an ASCII superset)
431 if strict is zero then literal control characters are allowed
432 *next_end_ptr is a return-by-reference index of the character
433 after the end quote
434
435 Return value is a new PyString (if ASCII-only) or PyUnicode
436 */
437 PyObject *rval;
438 Py_ssize_t len = PyString_GET_SIZE(pystr);
439 Py_ssize_t begin = end - 1;
440 Py_ssize_t next = begin;
441 int has_unicode = 0;
442 char *buf = PyString_AS_STRING(pystr);
443 PyObject *chunks = PyList_New(0);
444 if (chunks == NULL) {
445 goto bail;
446 }
447 if (end < 0 || len <= end) {
448 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
449 goto bail;
450 }
451 while (1) {
452 /* Find the end of the string or the next escape */
453 Py_UNICODE c = 0;
454 PyObject *chunk = NULL;
455 for (next = end; next < len; next++) {
456 c = (unsigned char)buf[next];
457 if (c == '"' || c == '\\') {
458 break;
459 }
460 else if (strict && c <= 0x1f) {
461 raise_errmsg("Invalid control character at", pystr, next);
462 goto bail;
463 }
464 else if (c > 0x7f) {
465 has_unicode = 1;
466 }
467 }
468 if (!(c == '"' || c == '\\')) {
469 raise_errmsg("Unterminated string starting at", pystr, begin);
470 goto bail;
471 }
472 /* Pick up this chunk if it's not zero length */
473 if (next != end) {
474 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
475 if (strchunk == NULL) {
476 goto bail;
477 }
478 if (has_unicode) {
479 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
480 Py_DECREF(strchunk);
481 if (chunk == NULL) {
482 goto bail;
483 }
484 }
485 else {
486 chunk = strchunk;
487 }
488 if (PyList_Append(chunks, chunk)) {
489 Py_DECREF(chunk);
490 goto bail;
491 }
492 Py_DECREF(chunk);
493 }
494 next++;
495 if (c == '"') {
496 end = next;
497 break;
498 }
499 if (next == len) {
500 raise_errmsg("Unterminated string starting at", pystr, begin);
501 goto bail;
502 }
503 c = buf[next];
504 if (c != 'u') {
505 /* Non-unicode backslash escapes */
506 end = next + 1;
507 switch (c) {
508 case '"': break;
509 case '\\': break;
510 case '/': break;
511 case 'b': c = '\b'; break;
512 case 'f': c = '\f'; break;
513 case 'n': c = '\n'; break;
514 case 'r': c = '\r'; break;
515 case 't': c = '\t'; break;
516 default: c = 0;
517 }
518 if (c == 0) {
519 raise_errmsg("Invalid \\escape", pystr, end - 2);
520 goto bail;
521 }
522 }
523 else {
524 c = 0;
525 next++;
526 end = next + 4;
527 if (end >= len) {
528 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
529 goto bail;
530 }
531 /* Decode 4 hex digits */
532 for (; next < end; next++) {
533 Py_UNICODE digit = buf[next];
534 c <<= 4;
535 switch (digit) {
536 case '0': case '1': case '2': case '3': case '4':
537 case '5': case '6': case '7': case '8': case '9':
538 c |= (digit - '0'); break;
539 case 'a': case 'b': case 'c': case 'd': case 'e':
540 case 'f':
541 c |= (digit - 'a' + 10); break;
542 case 'A': case 'B': case 'C': case 'D': case 'E':
543 case 'F':
544 c |= (digit - 'A' + 10); break;
545 default:
546 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
547 goto bail;
548 }
549 }
550 #ifdef Py_UNICODE_WIDE
551 /* Surrogate pair */
552 if ((c & 0xfc00) == 0xd800) {
553 Py_UNICODE c2 = 0;
554 if (end + 6 >= len) {
555 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
556 goto bail;
557 }
558 if (buf[next++] != '\\' || buf[next++] != 'u') {
559 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
560 goto bail;
561 }
562 end += 6;
563 /* Decode 4 hex digits */
564 for (; next < end; next++) {
565 c2 <<= 4;
566 Py_UNICODE digit = buf[next];
567 switch (digit) {
568 case '0': case '1': case '2': case '3': case '4':
569 case '5': case '6': case '7': case '8': case '9':
570 c2 |= (digit - '0'); break;
571 case 'a': case 'b': case 'c': case 'd': case 'e':
572 case 'f':
573 c2 |= (digit - 'a' + 10); break;
574 case 'A': case 'B': case 'C': case 'D': case 'E':
575 case 'F':
576 c2 |= (digit - 'A' + 10); break;
577 default:
578 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
579 goto bail;
580 }
581 }
582 if ((c2 & 0xfc00) != 0xdc00) {
583 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
584 goto bail;
585 }
586 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
587 }
588 else if ((c & 0xfc00) == 0xdc00) {
589 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
590 goto bail;
591 }
592 #endif
593 }
594 if (c > 0x7f) {
595 has_unicode = 1;
596 }
597 if (has_unicode) {
598 chunk = PyUnicode_FromUnicode(&c, 1);
599 if (chunk == NULL) {
600 goto bail;
601 }
602 }
603 else {
604 char c_char = Py_CHARMASK(c);
605 chunk = PyString_FromStringAndSize(&c_char, 1);
606 if (chunk == NULL) {
607 goto bail;
608 }
609 }
610 if (PyList_Append(chunks, chunk)) {
611 Py_DECREF(chunk);
612 goto bail;
613 }
614 Py_DECREF(chunk);
615 }
616
617 rval = join_list_string(chunks);
618 if (rval == NULL) {
619 goto bail;
620 }
621 Py_CLEAR(chunks);
622 *next_end_ptr = end;
623 return rval;
624 bail:
625 *next_end_ptr = -1;
626 Py_XDECREF(chunks);
627 return NULL;
628 }
629
630
631 static PyObject *
632 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
633 {
634 /* Read the JSON string from PyUnicode pystr.
635 end is the index of the first character after the quote.
636 if strict is zero then literal control characters are allowed
637 *next_end_ptr is a return-by-reference index of the character
638 after the end quote
639
640 Return value is a new PyUnicode
641 */
642 PyObject *rval;
643 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
644 Py_ssize_t begin = end - 1;
645 Py_ssize_t next = begin;
646 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
647 PyObject *chunks = PyList_New(0);
648 if (chunks == NULL) {
649 goto bail;
650 }
651 if (end < 0 || len <= end) {
652 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
653 goto bail;
654 }
655 while (1) {
656 /* Find the end of the string or the next escape */
657 Py_UNICODE c = 0;
658 PyObject *chunk = NULL;
659 for (next = end; next < len; next++) {
660 c = buf[next];
661 if (c == '"' || c == '\\') {
662 break;
663 }
664 else if (strict && c <= 0x1f) {
665 raise_errmsg("Invalid control character at", pystr, next);
666 goto bail;
667 }
668 }
669 if (!(c == '"' || c == '\\')) {
670 raise_errmsg("Unterminated string starting at", pystr, begin);
671 goto bail;
672 }
673 /* Pick up this chunk if it's not zero length */
674 if (next != end) {
675 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
676 if (chunk == NULL) {
677 goto bail;
678 }
679 if (PyList_Append(chunks, chunk)) {
680 Py_DECREF(chunk);
681 goto bail;
682 }
683 Py_DECREF(chunk);
684 }
685 next++;
686 if (c == '"') {
687 end = next;
688 break;
689 }
690 if (next == len) {
691 raise_errmsg("Unterminated string starting at", pystr, begin);
692 goto bail;
693 }
694 c = buf[next];
695 if (c != 'u') {
696 /* Non-unicode backslash escapes */
697 end = next + 1;
698 switch (c) {
699 case '"': break;
700 case '\\': break;
701 case '/': break;
702 case 'b': c = '\b'; break;
703 case 'f': c = '\f'; break;
704 case 'n': c = '\n'; break;
705 case 'r': c = '\r'; break;
706 case 't': c = '\t'; break;
707 default: c = 0;
708 }
709 if (c == 0) {
710 raise_errmsg("Invalid \\escape", pystr, end - 2);
711 goto bail;
712 }
713 }
714 else {
715 c = 0;
716 next++;
717 end = next + 4;
718 if (end >= len) {
719 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
720 goto bail;
721 }
722 /* Decode 4 hex digits */
723 for (; next < end; next++) {
724 Py_UNICODE digit = buf[next];
725 c <<= 4;
726 switch (digit) {
727 case '0': case '1': case '2': case '3': case '4':
728 case '5': case '6': case '7': case '8': case '9':
729 c |= (digit - '0'); break;
730 case 'a': case 'b': case 'c': case 'd': case 'e':
731 case 'f':
732 c |= (digit - 'a' + 10); break;
733 case 'A': case 'B': case 'C': case 'D': case 'E':
734 case 'F':
735 c |= (digit - 'A' + 10); break;
736 default:
737 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
738 goto bail;
739 }
740 }
741 #ifdef Py_UNICODE_WIDE
742 /* Surrogate pair */
743 if ((c & 0xfc00) == 0xd800) {
744 Py_UNICODE c2 = 0;
745 if (end + 6 >= len) {
746 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
747 goto bail;
748 }
749 if (buf[next++] != '\\' || buf[next++] != 'u') {
750 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
751 goto bail;
752 }
753 end += 6;
754 /* Decode 4 hex digits */
755 for (; next < end; next++) {
756 c2 <<= 4;
757 Py_UNICODE digit = buf[next];
758 switch (digit) {
759 case '0': case '1': case '2': case '3': case '4':
760 case '5': case '6': case '7': case '8': case '9':
761 c2 |= (digit - '0'); break;
762 case 'a': case 'b': case 'c': case 'd': case 'e':
763 case 'f':
764 c2 |= (digit - 'a' + 10); break;
765 case 'A': case 'B': case 'C': case 'D': case 'E':
766 case 'F':
767 c2 |= (digit - 'A' + 10); break;
768 default:
769 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
770 goto bail;
771 }
772 }
773 if ((c2 & 0xfc00) != 0xdc00) {
774 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
775 goto bail;
776 }
777 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
778 }
779 else if ((c & 0xfc00) == 0xdc00) {
780 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
781 goto bail;
782 }
783 #endif
784 }
785 chunk = PyUnicode_FromUnicode(&c, 1);
786 if (chunk == NULL) {
787 goto bail;
788 }
789 if (PyList_Append(chunks, chunk)) {
790 Py_DECREF(chunk);
791 goto bail;
792 }
793 Py_DECREF(chunk);
794 }
795
796 rval = join_list_unicode(chunks);
797 if (rval == NULL) {
798 goto bail;
799 }
800 Py_DECREF(chunks);
801 *next_end_ptr = end;
802 return rval;
803 bail:
804 *next_end_ptr = -1;
805 Py_XDECREF(chunks);
806 return NULL;
807 }
808
809 PyDoc_STRVAR(pydoc_scanstring,
810 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
811 "\n"
812 "Scan the string s for a JSON string. End is the index of the\n"
813 "character in s after the quote that started the JSON string.\n"
814 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
815 "on attempt to decode an invalid string. If strict is False then literal\n"
816 "control characters are allowed in the string.\n"
817 "\n"
818 "Returns a tuple of the decoded string and the index of the character in s\n"
819 "after the end quote."
820 );
821
822 static PyObject *
823 py_scanstring(PyObject* self UNUSED, PyObject *args)
824 {
825 PyObject *pystr;
826 PyObject *rval;
827 Py_ssize_t end;
828 Py_ssize_t next_end = -1;
829 char *encoding = NULL;
830 int strict = 1;
831 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
832 return NULL;
833 }
834 if (encoding == NULL) {
835 encoding = DEFAULT_ENCODING;
836 }
837 if (PyString_Check(pystr)) {
838 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
839 }
840 else if (PyUnicode_Check(pystr)) {
841 rval = scanstring_unicode(pystr, end, strict, &next_end);
842 }
843 else {
844 PyErr_Format(PyExc_TypeError,
845 "first argument must be a string, not %.80s",
846 Py_TYPE(pystr)->tp_name);
847 return NULL;
848 }
849 return _build_rval_index_tuple(rval, next_end);
850 }
851
852 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
853 "encode_basestring_ascii(basestring) -> str\n"
854 "\n"
855 "Return an ASCII-only JSON representation of a Python string"
856 );
857
858 static PyObject *
859 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
860 {
861 /* Return an ASCII-only JSON representation of a Python string */
862 /* METH_O */
863 if (PyString_Check(pystr)) {
864 return ascii_escape_str(pystr);
865 }
866 else if (PyUnicode_Check(pystr)) {
867 return ascii_escape_unicode(pystr);
868 }
869 else {
870 PyErr_Format(PyExc_TypeError,
871 "first argument must be a string, not %.80s",
872 Py_TYPE(pystr)->tp_name);
873 return NULL;
874 }
875 }
876
877 static void
878 scanner_dealloc(PyObject *self)
879 {
880 /* Deallocate scanner object */
881 scanner_clear(self);
882 Py_TYPE(self)->tp_free(self);
883 }
884
885 static int
886 scanner_traverse(PyObject *self, visitproc visit, void *arg)
887 {
888 PyScannerObject *s;
889 assert(PyScanner_Check(self));
890 s = (PyScannerObject *)self;
891 Py_VISIT(s->encoding);
892 Py_VISIT(s->strict);
893 Py_VISIT(s->object_hook);
894 Py_VISIT(s->parse_float);
895 Py_VISIT(s->parse_int);
896 Py_VISIT(s->parse_constant);
897 return 0;
898 }
899
900 static int
901 scanner_clear(PyObject *self)
902 {
903 PyScannerObject *s;
904 assert(PyScanner_Check(self));
905 s = (PyScannerObject *)self;
906 Py_CLEAR(s->encoding);
907 Py_CLEAR(s->strict);
908 Py_CLEAR(s->object_hook);
909 Py_CLEAR(s->parse_float);
910 Py_CLEAR(s->parse_int);
911 Py_CLEAR(s->parse_constant);
912 return 0;
913 }
914
915 static PyObject *
916 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
917 /* Read a JSON object from PyString pystr.
918 idx is the index of the first character after the opening curly brace.
919 *next_idx_ptr is a return-by-reference index to the first character after
920 the closing curly brace.
921
922 Returns a new PyObject (usually a dict, but object_hook can change that)
923 */
924 char *str = PyString_AS_STRING(pystr);
925 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
926 PyObject *rval = PyDict_New();
927 PyObject *key = NULL;
928 PyObject *val = NULL;
929 char *encoding = PyString_AS_STRING(s->encoding);
930 int strict = PyObject_IsTrue(s->strict);
931 Py_ssize_t next_idx;
932 if (rval == NULL)
933 return NULL;
934
935 /* skip whitespace after { */
936 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
937
938 /* only loop if the object is non-empty */
939 if (idx <= end_idx && str[idx] != '}') {
940 while (idx <= end_idx) {
941 /* read key */
942 if (str[idx] != '"') {
943 raise_errmsg("Expecting property name", pystr, idx);
944 goto bail;
945 }
946 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
947 if (key == NULL)
948 goto bail;
949 idx = next_idx;
950
951 /* skip whitespace between key and : delimiter, read :, skip whitespace */
952 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
953 if (idx > end_idx || str[idx] != ':') {
954 raise_errmsg("Expecting : delimiter", pystr, idx);
955 goto bail;
956 }
957 idx++;
958 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
959
960 /* read any JSON data type */
961 val = scan_once_str(s, pystr, idx, &next_idx);
962 if (val == NULL)
963 goto bail;
964
965 if (PyDict_SetItem(rval, key, val) == -1)
966 goto bail;
967
968 Py_CLEAR(key);
969 Py_CLEAR(val);
970 idx = next_idx;
971
972 /* skip whitespace before } or , */
973 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
974
975 /* bail if the object is closed or we didn't get the , delimiter */
976 if (idx > end_idx) break;
977 if (str[idx] == '}') {
978 break;
979 }
980 else if (str[idx] != ',') {
981 raise_errmsg("Expecting , delimiter", pystr, idx);
982 goto bail;
983 }
984 idx++;
985
986 /* skip whitespace after , delimiter */
987 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
988 }
989 }
990 /* verify that idx < end_idx, str[idx] should be '}' */
991 if (idx > end_idx || str[idx] != '}') {
992 raise_errmsg("Expecting object", pystr, end_idx);
993 goto bail;
994 }
995 /* if object_hook is not None: rval = object_hook(rval) */
996 if (s->object_hook != Py_None) {
997 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
998 if (val == NULL)
999 goto bail;
1000 Py_DECREF(rval);
1001 rval = val;
1002 val = NULL;
1003 }
1004 *next_idx_ptr = idx + 1;
1005 return rval;
1006 bail:
1007 Py_XDECREF(key);
1008 Py_XDECREF(val);
1009 Py_DECREF(rval);
1010 return NULL;
1011 }
1012
1013 static PyObject *
1014 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1015 /* Read a JSON object from PyUnicode pystr.
1016 idx is the index of the first character after the opening curly brace.
1017 *next_idx_ptr is a return-by-reference index to the first character after
1018 the closing curly brace.
1019
1020 Returns a new PyObject (usually a dict, but object_hook can change that)
1021 */
1022 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1023 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1024 PyObject *val = NULL;
1025 PyObject *rval = PyDict_New();
1026 PyObject *key = NULL;
1027 int strict = PyObject_IsTrue(s->strict);
1028 Py_ssize_t next_idx;
1029 if (rval == NULL)
1030 return NULL;
1031
1032 /* skip whitespace after { */
1033 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1034
1035 /* only loop if the object is non-empty */
1036 if (idx <= end_idx && str[idx] != '}') {
1037 while (idx <= end_idx) {
1038 /* read key */
1039 if (str[idx] != '"') {
1040 raise_errmsg("Expecting property name", pystr, idx);
1041 goto bail;
1042 }
1043 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1044 if (key == NULL)
1045 goto bail;
1046 idx = next_idx;
1047
1048 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1049 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1050 if (idx > end_idx || str[idx] != ':') {
1051 raise_errmsg("Expecting : delimiter", pystr, idx);
1052 goto bail;
1053 }
1054 idx++;
1055 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1056
1057 /* read any JSON term */
1058 val = scan_once_unicode(s, pystr, idx, &next_idx);
1059 if (val == NULL)
1060 goto bail;
1061
1062 if (PyDict_SetItem(rval, key, val) == -1)
1063 goto bail;
1064
1065 Py_CLEAR(key);
1066 Py_CLEAR(val);
1067 idx = next_idx;
1068
1069 /* skip whitespace before } or , */
1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1071
1072 /* bail if the object is closed or we didn't get the , delimiter */
1073 if (idx > end_idx) break;
1074 if (str[idx] == '}') {
1075 break;
1076 }
1077 else if (str[idx] != ',') {
1078 raise_errmsg("Expecting , delimiter", pystr, idx);
1079 goto bail;
1080 }
1081 idx++;
1082
1083 /* skip whitespace after , delimiter */
1084 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1085 }
1086 }
1087
1088 /* verify that idx < end_idx, str[idx] should be '}' */
1089 if (idx > end_idx || str[idx] != '}') {
1090 raise_errmsg("Expecting object", pystr, end_idx);
1091 goto bail;
1092 }
1093
1094 /* if object_hook is not None: rval = object_hook(rval) */
1095 if (s->object_hook != Py_None) {
1096 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1097 if (val == NULL)
1098 goto bail;
1099 Py_DECREF(rval);
1100 rval = val;
1101 val = NULL;
1102 }
1103 *next_idx_ptr = idx + 1;
1104 return rval;
1105 bail:
1106 Py_XDECREF(key);
1107 Py_XDECREF(val);
1108 Py_DECREF(rval);
1109 return NULL;
1110 }
1111
1112 static PyObject *
1113 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1114 /* Read a JSON array from PyString pystr.
1115 idx is the index of the first character after the opening brace.
1116 *next_idx_ptr is a return-by-reference index to the first character after
1117 the closing brace.
1118
1119 Returns a new PyList
1120 */
1121 char *str = PyString_AS_STRING(pystr);
1122 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1123 PyObject *val = NULL;
1124 PyObject *rval = PyList_New(0);
1125 Py_ssize_t next_idx;
1126 if (rval == NULL)
1127 return NULL;
1128
1129 /* skip whitespace after [ */
1130 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1131
1132 /* only loop if the array is non-empty */
1133 if (idx <= end_idx && str[idx] != ']') {
1134 while (idx <= end_idx) {
1135
1136 /* read any JSON term and de-tuplefy the (rval, idx) */
1137 val = scan_once_str(s, pystr, idx, &next_idx);
1138 if (val == NULL)
1139 goto bail;
1140
1141 if (PyList_Append(rval, val) == -1)
1142 goto bail;
1143
1144 Py_CLEAR(val);
1145 idx = next_idx;
1146
1147 /* skip whitespace between term and , */
1148 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1149
1150 /* bail if the array is closed or we didn't get the , delimiter */
1151 if (idx > end_idx) break;
1152 if (str[idx] == ']') {
1153 break;
1154 }
1155 else if (str[idx] != ',') {
1156 raise_errmsg("Expecting , delimiter", pystr, idx);
1157 goto bail;
1158 }
1159 idx++;
1160
1161 /* skip whitespace after , */
1162 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1163 }
1164 }
1165
1166 /* verify that idx < end_idx, str[idx] should be ']' */
1167 if (idx > end_idx || str[idx] != ']') {
1168 raise_errmsg("Expecting object", pystr, end_idx);
1169 goto bail;
1170 }
1171 *next_idx_ptr = idx + 1;
1172 return rval;
1173 bail:
1174 Py_XDECREF(val);
1175 Py_DECREF(rval);
1176 return NULL;
1177 }
1178
1179 static PyObject *
1180 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1181 /* Read a JSON array from PyString pystr.
1182 idx is the index of the first character after the opening brace.
1183 *next_idx_ptr is a return-by-reference index to the first character after
1184 the closing brace.
1185
1186 Returns a new PyList
1187 */
1188 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1189 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1190 PyObject *val = NULL;
1191 PyObject *rval = PyList_New(0);
1192 Py_ssize_t next_idx;
1193 if (rval == NULL)
1194 return NULL;
1195
1196 /* skip whitespace after [ */
1197 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1198
1199 /* only loop if the array is non-empty */
1200 if (idx <= end_idx && str[idx] != ']') {
1201 while (idx <= end_idx) {
1202
1203 /* read any JSON term */
1204 val = scan_once_unicode(s, pystr, idx, &next_idx);
1205 if (val == NULL)
1206 goto bail;
1207
1208 if (PyList_Append(rval, val) == -1)
1209 goto bail;
1210
1211 Py_CLEAR(val);
1212 idx = next_idx;
1213
1214 /* skip whitespace between term and , */
1215 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1216
1217 /* bail if the array is closed or we didn't get the , delimiter */
1218 if (idx > end_idx) break;
1219 if (str[idx] == ']') {
1220 break;
1221 }
1222 else if (str[idx] != ',') {
1223 raise_errmsg("Expecting , delimiter", pystr, idx);
1224 goto bail;
1225 }
1226 idx++;
1227
1228 /* skip whitespace after , */
1229 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1230 }
1231 }
1232
1233 /* verify that idx < end_idx, str[idx] should be ']' */
1234 if (idx > end_idx || str[idx] != ']') {
1235 raise_errmsg("Expecting object", pystr, end_idx);
1236 goto bail;
1237 }
1238 *next_idx_ptr = idx + 1;
1239 return rval;
1240 bail:
1241 Py_XDECREF(val);
1242 Py_DECREF(rval);
1243 return NULL;
1244 }
1245
1246 static PyObject *
1247 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1248 /* Read a JSON constant from PyString pystr.
1249 constant is the constant string that was found
1250 ("NaN", "Infinity", "-Infinity").
1251 idx is the index of the first character of the constant
1252 *next_idx_ptr is a return-by-reference index to the first character after
1253 the constant.
1254
1255 Returns the result of parse_constant
1256 */
1257 PyObject *cstr;
1258 PyObject *rval;
1259 /* constant is "NaN", "Infinity", or "-Infinity" */
1260 cstr = PyString_InternFromString(constant);
1261 if (cstr == NULL)
1262 return NULL;
1263
1264 /* rval = parse_constant(constant) */
1265 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1266 idx += PyString_GET_SIZE(cstr);
1267 Py_DECREF(cstr);
1268 *next_idx_ptr = idx;
1269 return rval;
1270 }
1271
1272 static PyObject *
1273 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1274 /* Read a JSON number from PyString pystr.
1275 idx is the index of the first character of the number
1276 *next_idx_ptr is a return-by-reference index to the first character after
1277 the number.
1278
1279 Returns a new PyObject representation of that number:
1280 PyInt, PyLong, or PyFloat.
1281 May return other types if parse_int or parse_float are set
1282 */
1283 char *str = PyString_AS_STRING(pystr);
1284 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1285 Py_ssize_t idx = start;
1286 int is_float = 0;
1287 PyObject *rval;
1288 PyObject *numstr;
1289
1290 /* read a sign if it's there, make sure it's not the end of the string */
1291 if (str[idx] == '-') {
1292 idx++;
1293 if (idx > end_idx) {
1294 PyErr_SetNone(PyExc_StopIteration);
1295 return NULL;
1296 }
1297 }
1298
1299 /* read as many integer digits as we find as long as it doesn't start with 0 */
1300 if (str[idx] >= '1' && str[idx] <= '9') {
1301 idx++;
1302 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1303 }
1304 /* if it starts with 0 we only expect one integer digit */
1305 else if (str[idx] == '0') {
1306 idx++;
1307 }
1308 /* no integer digits, error */
1309 else {
1310 PyErr_SetNone(PyExc_StopIteration);
1311 return NULL;
1312 }
1313
1314 /* if the next char is '.' followed by a digit then read all float digits */
1315 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1316 is_float = 1;
1317 idx += 2;
1318 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1319 }
1320
1321 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1322 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1323
1324 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1325 Py_ssize_t e_start = idx;
1326 idx++;
1327
1328 /* read an exponent sign if present */
1329 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1330
1331 /* read all digits */
1332 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1333
1334 /* if we got a digit, then parse as float. if not, backtrack */
1335 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1336 is_float = 1;
1337 }
1338 else {
1339 idx = e_start;
1340 }
1341 }
1342
1343 /* copy the section we determined to be a number */
1344 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1345 if (numstr == NULL)
1346 return NULL;
1347 if (is_float) {
1348 /* parse as a float using a fast path if available, otherwise call user defined method */
1349 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1350 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1351 }
1352 else {
1353 rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr)));
1354 }
1355 }
1356 else {
1357 /* parse as an int using a fast path if available, otherwise call user defined method */
1358 if (s->parse_int != (PyObject *)&PyInt_Type) {
1359 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1360 }
1361 else {
1362 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1363 }
1364 }
1365 Py_DECREF(numstr);
1366 *next_idx_ptr = idx;
1367 return rval;
1368 }
1369
1370 static PyObject *
1371 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1372 /* Read a JSON number from PyUnicode pystr.
1373 idx is the index of the first character of the number
1374 *next_idx_ptr is a return-by-reference index to the first character after
1375 the number.
1376
1377 Returns a new PyObject representation of that number:
1378 PyInt, PyLong, or PyFloat.
1379 May return other types if parse_int or parse_float are set
1380 */
1381 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1382 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1383 Py_ssize_t idx = start;
1384 int is_float = 0;
1385 PyObject *rval;
1386 PyObject *numstr;
1387
1388 /* read a sign if it's there, make sure it's not the end of the string */
1389 if (str[idx] == '-') {
1390 idx++;
1391 if (idx > end_idx) {
1392 PyErr_SetNone(PyExc_StopIteration);
1393 return NULL;
1394 }
1395 }
1396
1397 /* read as many integer digits as we find as long as it doesn't start with 0 */
1398 if (str[idx] >= '1' && str[idx] <= '9') {
1399 idx++;
1400 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1401 }
1402 /* if it starts with 0 we only expect one integer digit */
1403 else if (str[idx] == '0') {
1404 idx++;
1405 }
1406 /* no integer digits, error */
1407 else {
1408 PyErr_SetNone(PyExc_StopIteration);
1409 return NULL;
1410 }
1411
1412 /* if the next char is '.' followed by a digit then read all float digits */
1413 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1414 is_float = 1;
1415 idx += 2;
1416 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1417 }
1418
1419 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1420 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1421 Py_ssize_t e_start = idx;
1422 idx++;
1423
1424 /* read an exponent sign if present */
1425 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1426
1427 /* read all digits */
1428 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1429
1430 /* if we got a digit, then parse as float. if not, backtrack */
1431 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1432 is_float = 1;
1433 }
1434 else {
1435 idx = e_start;
1436 }
1437 }
1438
1439 /* copy the section we determined to be a number */
1440 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1441 if (numstr == NULL)
1442 return NULL;
1443 if (is_float) {
1444 /* parse as a float using a fast path if available, otherwise call user defined method */
1445 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1446 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1447 }
1448 else {
1449 rval = PyFloat_FromString(numstr, NULL);
1450 }
1451 }
1452 else {
1453 /* no fast path for unicode -> int, just call */
1454 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1455 }
1456 Py_DECREF(numstr);
1457 *next_idx_ptr = idx;
1458 return rval;
1459 }
1460
1461 static PyObject *
1462 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1463 {
1464 /* Read one JSON term (of any kind) from PyString pystr.
1465 idx is the index of the first character of the term
1466 *next_idx_ptr is a return-by-reference index to the first character after
1467 the number.
1468
1469 Returns a new PyObject representation of the term.
1470 */
1471 char *str = PyString_AS_STRING(pystr);
1472 Py_ssize_t length = PyString_GET_SIZE(pystr);
1473 if (idx >= length) {
1474 PyErr_SetNone(PyExc_StopIteration);
1475 return NULL;
1476 }
1477 switch (str[idx]) {
1478 case '"':
1479 /* string */
1480 return scanstring_str(pystr, idx + 1,
1481 PyString_AS_STRING(s->encoding),
1482 PyObject_IsTrue(s->strict),
1483 next_idx_ptr);
1484 case '{':
1485 /* object */
1486 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1487 case '[':
1488 /* array */
1489 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1490 case 'n':
1491 /* null */
1492 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1493 Py_INCREF(Py_None);
1494 *next_idx_ptr = idx + 4;
1495 return Py_None;
1496 }
1497 break;
1498 case 't':
1499 /* true */
1500 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1501 Py_INCREF(Py_True);
1502 *next_idx_ptr = idx + 4;
1503 return Py_True;
1504 }
1505 break;
1506 case 'f':
1507 /* false */
1508 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1509 Py_INCREF(Py_False);
1510 *next_idx_ptr = idx + 5;
1511 return Py_False;
1512 }
1513 break;
1514 case 'N':
1515 /* NaN */
1516 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1517 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1518 }
1519 break;
1520 case 'I':
1521 /* Infinity */
1522 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1523 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1524 }
1525 break;
1526 case '-':
1527 /* -Infinity */
1528 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1529 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1530 }
1531 break;
1532 }
1533 /* Didn't find a string, object, array, or named constant. Look for a number. */
1534 return _match_number_str(s, pystr, idx, next_idx_ptr);
1535 }
1536
1537 static PyObject *
1538 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1539 {
1540 /* Read one JSON term (of any kind) from PyUnicode pystr.
1541 idx is the index of the first character of the term
1542 *next_idx_ptr is a return-by-reference index to the first character after
1543 the number.
1544
1545 Returns a new PyObject representation of the term.
1546 */
1547 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1548 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1549 if (idx >= length) {
1550 PyErr_SetNone(PyExc_StopIteration);
1551 return NULL;
1552 }
1553 switch (str[idx]) {
1554 case '"':
1555 /* string */
1556 return scanstring_unicode(pystr, idx + 1,
1557 PyObject_IsTrue(s->strict),
1558 next_idx_ptr);
1559 case '{':
1560 /* object */
1561 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1562 case '[':
1563 /* array */
1564 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1565 case 'n':
1566 /* null */
1567 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1568 Py_INCREF(Py_None);
1569 *next_idx_ptr = idx + 4;
1570 return Py_None;
1571 }
1572 break;
1573 case 't':
1574 /* true */
1575 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1576 Py_INCREF(Py_True);
1577 *next_idx_ptr = idx + 4;
1578 return Py_True;
1579 }
1580 break;
1581 case 'f':
1582 /* false */
1583 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1584 Py_INCREF(Py_False);
1585 *next_idx_ptr = idx + 5;
1586 return Py_False;
1587 }
1588 break;
1589 case 'N':
1590 /* NaN */
1591 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1592 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1593 }
1594 break;
1595 case 'I':
1596 /* Infinity */
1597 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1598 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1599 }
1600 break;
1601 case '-':
1602 /* -Infinity */
1603 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1604 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1605 }
1606 break;
1607 }
1608 /* Didn't find a string, object, array, or named constant. Look for a number. */
1609 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1610 }
1611
1612 static PyObject *
1613 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1614 {
1615 /* Python callable interface to scan_once_{str,unicode} */
1616 PyObject *pystr;
1617 PyObject *rval;
1618 Py_ssize_t idx;
1619 Py_ssize_t next_idx = -1;
1620 static char *kwlist[] = {"string", "idx", NULL};
1621 PyScannerObject *s;
1622 assert(PyScanner_Check(self));
1623 s = (PyScannerObject *)self;
1624 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1625 return NULL;
1626
1627 if (PyString_Check(pystr)) {
1628 rval = scan_once_str(s, pystr, idx, &next_idx);
1629 }
1630 else if (PyUnicode_Check(pystr)) {
1631 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1632 }
1633 else {
1634 PyErr_Format(PyExc_TypeError,
1635 "first argument must be a string, not %.80s",
1636 Py_TYPE(pystr)->tp_name);
1637 return NULL;
1638 }
1639 return _build_rval_index_tuple(rval, next_idx);
1640 }
1641
1642 static PyObject *
1643 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1644 {
1645 PyScannerObject *s;
1646 s = (PyScannerObject *)type->tp_alloc(type, 0);
1647 if (s != NULL) {
1648 s->encoding = NULL;
1649 s->strict = NULL;
1650 s->object_hook = NULL;
1651 s->parse_float = NULL;
1652 s->parse_int = NULL;
1653 s->parse_constant = NULL;
1654 }
1655 return (PyObject *)s;
1656 }
1657
1658 static int
1659 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1660 {
1661 /* Initialize Scanner object */
1662 PyObject *ctx;
1663 static char *kwlist[] = {"context", NULL};
1664 PyScannerObject *s;
1665
1666 assert(PyScanner_Check(self));
1667 s = (PyScannerObject *)self;
1668
1669 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1670 return -1;
1671
1672 /* PyString_AS_STRING is used on encoding */
1673 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1674 if (s->encoding == Py_None) {
1675 Py_DECREF(Py_None);
1676 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1677 }
1678 else if (PyUnicode_Check(s->encoding)) {
1679 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1680 Py_DECREF(s->encoding);
1681 s->encoding = tmp;
1682 }
1683 if (s->encoding == NULL || !PyString_Check(s->encoding))
1684 goto bail;
1685
1686 /* All of these will fail "gracefully" so we don't need to verify them */
1687 s->strict = PyObject_GetAttrString(ctx, "strict");
1688 if (s->strict == NULL)
1689 goto bail;
1690 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1691 if (s->object_hook == NULL)
1692 goto bail;
1693 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1694 if (s->parse_float == NULL)
1695 goto bail;
1696 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1697 if (s->parse_int == NULL)
1698 goto bail;
1699 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1700 if (s->parse_constant == NULL)
1701 goto bail;
1702
1703 return 0;
1704
1705 bail:
1706 Py_CLEAR(s->encoding);
1707 Py_CLEAR(s->strict);
1708 Py_CLEAR(s->object_hook);
1709 Py_CLEAR(s->parse_float);
1710 Py_CLEAR(s->parse_int);
1711 Py_CLEAR(s->parse_constant);
1712 return -1;
1713 }
1714
1715 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1716
1717 static
1718 PyTypeObject PyScannerType = {
1719 PyObject_HEAD_INIT(NULL)
1720 0, /* tp_internal */
1721 "simplejson._speedups.Scanner", /* tp_name */
1722 sizeof(PyScannerObject), /* tp_basicsize */
1723 0, /* tp_itemsize */
1724 scanner_dealloc, /* tp_dealloc */
1725 0, /* tp_print */
1726 0, /* tp_getattr */
1727 0, /* tp_setattr */
1728 0, /* tp_compare */
1729 0, /* tp_repr */
1730 0, /* tp_as_number */
1731 0, /* tp_as_sequence */
1732 0, /* tp_as_mapping */
1733 0, /* tp_hash */
1734 scanner_call, /* tp_call */
1735 0, /* tp_str */
1736 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1737 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1738 0, /* tp_as_buffer */
1739 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1740 scanner_doc, /* tp_doc */
1741 scanner_traverse, /* tp_traverse */
1742 scanner_clear, /* tp_clear */
1743 0, /* tp_richcompare */
1744 0, /* tp_weaklistoffset */
1745 0, /* tp_iter */
1746 0, /* tp_iternext */
1747 0, /* tp_methods */
1748 scanner_members, /* tp_members */
1749 0, /* tp_getset */
1750 0, /* tp_base */
1751 0, /* tp_dict */
1752 0, /* tp_descr_get */
1753 0, /* tp_descr_set */
1754 0, /* tp_dictoffset */
1755 scanner_init, /* tp_init */
1756 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1757 scanner_new, /* tp_new */
1758 0,/* PyObject_GC_Del, */ /* tp_free */
1759 };
1760
1761 static PyObject *
1762 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1763 {
1764 PyEncoderObject *s;
1765 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1766 if (s != NULL) {
1767 s->markers = NULL;
1768 s->defaultfn = NULL;
1769 s->encoder = NULL;
1770 s->indent = NULL;
1771 s->key_separator = NULL;
1772 s->item_separator = NULL;
1773 s->sort_keys = NULL;
1774 s->skipkeys = NULL;
1775 }
1776 return (PyObject *)s;
1777 }
1778
1779 static int
1780 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1781 {
1782 /* initialize Encoder object */
1783 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1784
1785 PyEncoderObject *s;
1786 PyObject *allow_nan;
1787
1788 assert(PyEncoder_Check(self));
1789 s = (PyEncoderObject *)self;
1790
1791 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1792 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
1793 return -1;
1794
1795 Py_INCREF(s->markers);
1796 Py_INCREF(s->defaultfn);
1797 Py_INCREF(s->encoder);
1798 Py_INCREF(s->indent);
1799 Py_INCREF(s->key_separator);
1800 Py_INCREF(s->item_separator);
1801 Py_INCREF(s->sort_keys);
1802 Py_INCREF(s->skipkeys);
1803 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1804 s->allow_nan = PyObject_IsTrue(allow_nan);
1805 return 0;
1806 }
1807
1808 static PyObject *
1809 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1810 {
1811 /* Python callable interface to encode_listencode_obj */
1812 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1813 PyObject *obj;
1814 PyObject *rval;
1815 Py_ssize_t indent_level;
1816 PyEncoderObject *s;
1817 assert(PyEncoder_Check(self));
1818 s = (PyEncoderObject *)self;
1819 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1820 &obj, _convertPyInt_AsSsize_t, &indent_level))
1821 return NULL;
1822 rval = PyList_New(0);
1823 if (rval == NULL)
1824 return NULL;
1825 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1826 Py_DECREF(rval);
1827 return NULL;
1828 }
1829 return rval;
1830 }
1831
1832 static PyObject *
1833 _encoded_const(PyObject *obj)
1834 {
1835 /* Return the JSON string representation of None, True, False */
1836 if (obj == Py_None) {
1837 static PyObject *s_null = NULL;
1838 if (s_null == NULL) {
1839 s_null = PyString_InternFromString("null");
1840 }
1841 Py_INCREF(s_null);
1842 return s_null;
1843 }
1844 else if (obj == Py_True) {
1845 static PyObject *s_true = NULL;
1846 if (s_true == NULL) {
1847 s_true = PyString_InternFromString("true");
1848 }
1849 Py_INCREF(s_true);
1850 return s_true;
1851 }
1852 else if (obj == Py_False) {
1853 static PyObject *s_false = NULL;
1854 if (s_false == NULL) {
1855 s_false = PyString_InternFromString("false");
1856 }
1857 Py_INCREF(s_false);
1858 return s_false;
1859 }
1860 else {
1861 PyErr_SetString(PyExc_ValueError, "not a const");
1862 return NULL;
1863 }
1864 }
1865
1866 static PyObject *
1867 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1868 {
1869 /* Return the JSON representation of a PyFloat */
1870 double i = PyFloat_AS_DOUBLE(obj);
1871 if (!Py_IS_FINITE(i)) {
1872 if (!s->allow_nan) {
1873 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1874 return NULL;
1875 }
1876 if (i > 0) {
1877 return PyString_FromString("Infinity");
1878 }
1879 else if (i < 0) {
1880 return PyString_FromString("-Infinity");
1881 }
1882 else {
1883 return PyString_FromString("NaN");
1884 }
1885 }
1886 /* Use a better float format here? */
1887 return PyObject_Repr(obj);
1888 }
1889
1890 static PyObject *
1891 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1892 {
1893 /* Return the JSON representation of a string */
1894 if (s->fast_encode)
1895 return py_encode_basestring_ascii(NULL, obj);
1896 else
1897 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1898 }
1899
1900 static int
1901 _steal_list_append(PyObject *lst, PyObject *stolen)
1902 {
1903 /* Append stolen and then decrement its reference count */
1904 int rval = PyList_Append(lst, stolen);
1905 Py_DECREF(stolen);
1906 return rval;
1907 }
1908
1909 static int
1910 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1911 {
1912 /* Encode Python object obj to a JSON term, rval is a PyList */
1913 PyObject *newobj;
1914 int rv;
1915
1916 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1917 PyObject *cstr = _encoded_const(obj);
1918 if (cstr == NULL)
1919 return -1;
1920 return _steal_list_append(rval, cstr);
1921 }
1922 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1923 {
1924 PyObject *encoded = encoder_encode_string(s, obj);
1925 if (encoded == NULL)
1926 return -1;
1927 return _steal_list_append(rval, encoded);
1928 }
1929 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1930 PyObject *encoded = PyObject_Str(obj);
1931 if (encoded == NULL)
1932 return -1;
1933 return _steal_list_append(rval, encoded);
1934 }
1935 else if (PyFloat_Check(obj)) {
1936 PyObject *encoded = encoder_encode_float(s, obj);
1937 if (encoded == NULL)
1938 return -1;
1939 return _steal_list_append(rval, encoded);
1940 }
1941 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1942 return encoder_listencode_list(s, rval, obj, indent_level);
1943 }
1944 else if (PyDict_Check(obj)) {
1945 return encoder_listencode_dict(s, rval, obj, indent_level);
1946 }
1947 else {
1948 PyObject *ident = NULL;
1949 if (s->markers != Py_None) {
1950 int has_key;
1951 ident = PyLong_FromVoidPtr(obj);
1952 if (ident == NULL)
1953 return -1;
1954 has_key = PyDict_Contains(s->markers, ident);
1955 if (has_key) {
1956 if (has_key != -1)
1957 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1958 Py_DECREF(ident);
1959 return -1;
1960 }
1961 if (PyDict_SetItem(s->markers, ident, obj)) {
1962 Py_DECREF(ident);
1963 return -1;
1964 }
1965 }
1966 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1967 if (newobj == NULL) {
1968 Py_XDECREF(ident);
1969 return -1;
1970 }
1971 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
1972 Py_DECREF(newobj);
1973 if (rv) {
1974 Py_XDECREF(ident);
1975 return -1;
1976 }
1977 if (ident != NULL) {
1978 if (PyDict_DelItem(s->markers, ident)) {
1979 Py_XDECREF(ident);
1980 return -1;
1981 }
1982 Py_XDECREF(ident);
1983 }
1984 return rv;
1985 }
1986 }
1987
1988 static int
1989 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
1990 {
1991 /* Encode Python dict dct a JSON term, rval is a PyList */
1992 static PyObject *open_dict = NULL;
1993 static PyObject *close_dict = NULL;
1994 static PyObject *empty_dict = NULL;
1995 PyObject *kstr = NULL;
1996 PyObject *ident = NULL;
1997 PyObject *key, *value;
1998 Py_ssize_t pos;
1999 int skipkeys;
2000 Py_ssize_t idx;
2001
2002 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2003 open_dict = PyString_InternFromString("{");
2004 close_dict = PyString_InternFromString("}");
2005 empty_dict = PyString_InternFromString("{}");
2006 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2007 return -1;
2008 }
2009 if (PyDict_Size(dct) == 0)
2010 return PyList_Append(rval, empty_dict);
2011
2012 if (s->markers != Py_None) {
2013 int has_key;
2014 ident = PyLong_FromVoidPtr(dct);
2015 if (ident == NULL)
2016 goto bail;
2017 has_key = PyDict_Contains(s->markers, ident);
2018 if (has_key) {
2019 if (has_key != -1)
2020 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2021 goto bail;
2022 }
2023 if (PyDict_SetItem(s->markers, ident, dct)) {
2024 goto bail;
2025 }
2026 }
2027
2028 if (PyList_Append(rval, open_dict))
2029 goto bail;
2030
2031 if (s->indent != Py_None) {
2032 /* TODO: DOES NOT RUN */
2033 indent_level += 1;
2034 /*
2035 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2036 separator = _item_separator + newline_indent
2037 buf += newline_indent
2038 */
2039 }
2040
2041 /* TODO: C speedup not implemented for sort_keys */
2042
2043 pos = 0;
2044 skipkeys = PyObject_IsTrue(s->skipkeys);
2045 idx = 0;
2046 while (PyDict_Next(dct, &pos, &key, &value)) {
2047 PyObject *encoded;
2048
2049 if (PyString_Check(key) || PyUnicode_Check(key)) {
2050 Py_INCREF(key);
2051 kstr = key;
2052 }
2053 else if (PyFloat_Check(key)) {
2054 kstr = encoder_encode_float(s, key);
2055 if (kstr == NULL)
2056 goto bail;
2057 }
2058 else if (PyInt_Check(key) || PyLong_Check(key)) {
2059 kstr = PyObject_Str(key);
2060 if (kstr == NULL)
2061 goto bail;
2062 }
2063 else if (key == Py_True || key == Py_False || key == Py_None) {
2064 kstr = _encoded_const(key);
2065 if (kstr == NULL)
2066 goto bail;
2067 }
2068 else if (skipkeys) {
2069 continue;
2070 }
2071 else {
2072 /* TODO: include repr of key */
2073 PyErr_SetString(PyExc_ValueError, "keys must be a string");
2074 goto bail;
2075 }
2076
2077 if (idx) {
2078 if (PyList_Append(rval, s->item_separator))
2079 goto bail;
2080 }
2081
2082 encoded = encoder_encode_string(s, kstr);
2083 Py_CLEAR(kstr);
2084 if (encoded == NULL)
2085 goto bail;
2086 if (PyList_Append(rval, encoded)) {
2087 Py_DECREF(encoded);
2088 goto bail;
2089 }
2090 Py_DECREF(encoded);
2091 if (PyList_Append(rval, s->key_separator))
2092 goto bail;
2093 if (encoder_listencode_obj(s, rval, value, indent_level))
2094 goto bail;
2095 idx += 1;
2096 }
2097 if (ident != NULL) {
2098 if (PyDict_DelItem(s->markers, ident))
2099 goto bail;
2100 Py_CLEAR(ident);
2101 }
2102 if (s->indent != Py_None) {
2103 /* TODO: DOES NOT RUN */
2104 indent_level -= 1;
2105 /*
2106 yield '\n' + (' ' * (_indent * _current_indent_level))
2107 */
2108 }
2109 if (PyList_Append(rval, close_dict))
2110 goto bail;
2111 return 0;
2112
2113 bail:
2114 Py_XDECREF(kstr);
2115 Py_XDECREF(ident);
2116 return -1;
2117 }
2118
2119
2120 static int
2121 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2122 {
2123 /* Encode Python list seq to a JSON term, rval is a PyList */
2124 static PyObject *open_array = NULL;
2125 static PyObject *close_array = NULL;
2126 static PyObject *empty_array = NULL;
2127 PyObject *ident = NULL;
2128 PyObject *s_fast = NULL;
2129 Py_ssize_t num_items;
2130 PyObject **seq_items;
2131 Py_ssize_t i;
2132
2133 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2134 open_array = PyString_InternFromString("[");
2135 close_array = PyString_InternFromString("]");
2136 empty_array = PyString_InternFromString("[]");
2137 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2138 return -1;
2139 }
2140 ident = NULL;
2141 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2142 if (s_fast == NULL)
2143 return -1;
2144 num_items = PySequence_Fast_GET_SIZE(s_fast);
2145 if (num_items == 0) {
2146 Py_DECREF(s_fast);
2147 return PyList_Append(rval, empty_array);
2148 }
2149
2150 if (s->markers != Py_None) {
2151 int has_key;
2152 ident = PyLong_FromVoidPtr(seq);
2153 if (ident == NULL)
2154 goto bail;
2155 has_key = PyDict_Contains(s->markers, ident);
2156 if (has_key) {
2157 if (has_key != -1)
2158 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2159 goto bail;
2160 }
2161 if (PyDict_SetItem(s->markers, ident, seq)) {
2162 goto bail;
2163 }
2164 }
2165
2166 seq_items = PySequence_Fast_ITEMS(s_fast);
2167 if (PyList_Append(rval, open_array))
2168 goto bail;
2169 if (s->indent != Py_None) {
2170 /* TODO: DOES NOT RUN */
2171 indent_level += 1;
2172 /*
2173 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2174 separator = _item_separator + newline_indent
2175 buf += newline_indent
2176 */
2177 }
2178 for (i = 0; i < num_items; i++) {
2179 PyObject *obj = seq_items[i];
2180 if (i) {
2181 if (PyList_Append(rval, s->item_separator))
2182 goto bail;
2183 }
2184 if (encoder_listencode_obj(s, rval, obj, indent_level))
2185 goto bail;
2186 }
2187 if (ident != NULL) {
2188 if (PyDict_DelItem(s->markers, ident))
2189 goto bail;
2190 Py_CLEAR(ident);
2191 }
2192 if (s->indent != Py_None) {
2193 /* TODO: DOES NOT RUN */
2194 indent_level -= 1;
2195 /*
2196 yield '\n' + (' ' * (_indent * _current_indent_level))
2197 */
2198 }
2199 if (PyList_Append(rval, close_array))
2200 goto bail;
2201 Py_DECREF(s_fast);
2202 return 0;
2203
2204 bail:
2205 Py_XDECREF(ident);
2206 Py_DECREF(s_fast);
2207 return -1;
2208 }
2209
2210 static void
2211 encoder_dealloc(PyObject *self)
2212 {
2213 /* Deallocate Encoder */
2214 encoder_clear(self);
2215 Py_TYPE(self)->tp_free(self);
2216 }
2217
2218 static int
2219 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2220 {
2221 PyEncoderObject *s;
2222 assert(PyEncoder_Check(self));
2223 s = (PyEncoderObject *)self;
2224 Py_VISIT(s->markers);
2225 Py_VISIT(s->defaultfn);
2226 Py_VISIT(s->encoder);
2227 Py_VISIT(s->indent);
2228 Py_VISIT(s->key_separator);
2229 Py_VISIT(s->item_separator);
2230 Py_VISIT(s->sort_keys);
2231 Py_VISIT(s->skipkeys);
2232 return 0;
2233 }
2234
2235 static int
2236 encoder_clear(PyObject *self)
2237 {
2238 /* Deallocate Encoder */
2239 PyEncoderObject *s;
2240 assert(PyEncoder_Check(self));
2241 s = (PyEncoderObject *)self;
2242 Py_CLEAR(s->markers);
2243 Py_CLEAR(s->defaultfn);
2244 Py_CLEAR(s->encoder);
2245 Py_CLEAR(s->indent);
2246 Py_CLEAR(s->key_separator);
2247 Py_CLEAR(s->item_separator);
2248 Py_CLEAR(s->sort_keys);
2249 Py_CLEAR(s->skipkeys);
2250 return 0;
2251 }
2252
2253 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2254
2255 static
2256 PyTypeObject PyEncoderType = {
2257 PyObject_HEAD_INIT(NULL)
2258 0, /* tp_internal */
2259 "simplejson._speedups.Encoder", /* tp_name */
2260 sizeof(PyEncoderObject), /* tp_basicsize */
2261 0, /* tp_itemsize */
2262 encoder_dealloc, /* tp_dealloc */
2263 0, /* tp_print */
2264 0, /* tp_getattr */
2265 0, /* tp_setattr */
2266 0, /* tp_compare */
2267 0, /* tp_repr */
2268 0, /* tp_as_number */
2269 0, /* tp_as_sequence */
2270 0, /* tp_as_mapping */
2271 0, /* tp_hash */
2272 encoder_call, /* tp_call */
2273 0, /* tp_str */
2274 0, /* tp_getattro */
2275 0, /* tp_setattro */
2276 0, /* tp_as_buffer */
2277 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2278 encoder_doc, /* tp_doc */
2279 encoder_traverse, /* tp_traverse */
2280 encoder_clear, /* tp_clear */
2281 0, /* tp_richcompare */
2282 0, /* tp_weaklistoffset */
2283 0, /* tp_iter */
2284 0, /* tp_iternext */
2285 0, /* tp_methods */
2286 encoder_members, /* tp_members */
2287 0, /* tp_getset */
2288 0, /* tp_base */
2289 0, /* tp_dict */
2290 0, /* tp_descr_get */
2291 0, /* tp_descr_set */
2292 0, /* tp_dictoffset */
2293 encoder_init, /* tp_init */
2294 0, /* tp_alloc */
2295 encoder_new, /* tp_new */
2296 0, /* tp_free */
2297 };
2298
2299 static PyMethodDef speedups_methods[] = {
2300 {"encode_basestring_ascii",
2301 (PyCFunction)py_encode_basestring_ascii,
2302 METH_O,
2303 pydoc_encode_basestring_ascii},
2304 {"scanstring",
2305 (PyCFunction)py_scanstring,
2306 METH_VARARGS,
2307 pydoc_scanstring},
2308 {NULL, NULL, 0, NULL}
2309 };
2310
2311 PyDoc_STRVAR(module_doc,
2312 "simplejson speedups\n");
2313
2314 void
2315 init_speedups(void)
2316 {
2317 PyObject *m;
2318 PyScannerType.tp_new = PyType_GenericNew;
2319 if (PyType_Ready(&PyScannerType) < 0)
2320 return;
2321 PyEncoderType.tp_new = PyType_GenericNew;
2322 if (PyType_Ready(&PyEncoderType) < 0)
2323 return;
2324 m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2325 Py_INCREF((PyObject*)&PyScannerType);
2326 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2327 Py_INCREF((PyObject*)&PyEncoderType);
2328 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2329 }