sajson
High-Performance JSON Parser
sajson.h
1 /*
2  * Copyright (c) 2012-2017 Chad Austin
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include <assert.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <math.h>
32 #include <limits.h>
33 #include <algorithm>
34 #include <cstdio>
35 #include <limits>
36 
37 #ifndef SAJSON_NO_STD_STRING
38 #include <string> // for convenient access to error messages and string values.
39 #endif
40 
41 #if defined(__GNUC__) || defined(__clang__)
42 #define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1)
43 #define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
44 #define SAJSON_ALWAYS_INLINE __attribute__((always_inline))
45 #define SAJSON_UNREACHABLE() __builtin_unreachable()
46 #define SAJSON_snprintf snprintf
47 #elif defined(_MSC_VER)
48 #define SAJSON_LIKELY(x) x
49 #define SAJSON_UNLIKELY(x) x
50 #define SAJSON_ALWAYS_INLINE __forceinline
51 #define SAJSON_UNREACHABLE() __assume(0)
52 #if (_MSC_VER <= 1800)
53 #define SAJSON_snprintf _snprintf
54 #else
55 #define SAJSON_snprintf snprintf
56 #endif
57 #else
58 #define SAJSON_LIKELY(x) x
59 #define SAJSON_UNLIKELY(x) x
60 #define SAJSON_ALWAYS_INLINE inline
61 #define SAJSON_UNREACHABLE() assert(!"unreachable")
62 #define SAJSON_snprintf snprintf
63 #endif
64 
68 namespace sajson {
69 
71  enum type: uint8_t {
72  TYPE_INTEGER = 0,
73  TYPE_DOUBLE = 1,
74  TYPE_NULL = 2,
75  TYPE_FALSE = 3,
76  TYPE_TRUE = 4,
77  TYPE_STRING = 5,
78  TYPE_ARRAY = 6,
79  TYPE_OBJECT = 7,
80  };
81 
82  namespace internal {
83  static const size_t TYPE_BITS = 3;
84  static const size_t TYPE_MASK = (1 << TYPE_BITS) - 1;
85  static const size_t VALUE_MASK = size_t(-1) >> TYPE_BITS;
86 
87  static const size_t ROOT_MARKER = VALUE_MASK;
88 
89  inline type get_element_type(size_t s) {
90  return static_cast<type>(s & TYPE_MASK);
91  }
92 
93  inline size_t get_element_value(size_t s) {
94  return s >> TYPE_BITS;
95  }
96 
97  inline size_t make_element(type t, size_t value) {
98  //assert((value & ~VALUE_MASK) == 0);
99  //value &= VALUE_MASK;
100  return static_cast<size_t>(t) | (value << TYPE_BITS);
101  }
102 
103  // This template utilizes the One Definition Rule to create global arrays in a header.
104  // This trick courtesy of Rich Geldreich's Purple JSON parser.
105  template<typename unused=void>
106  struct globals_struct {
107  static const unsigned char parse_flags[256];
108  };
109  typedef globals_struct<> globals;
110 
111  // bit 0 (1) - set if: plain ASCII string character
112  // bit 1 (2) - set if: whitespace
113  // bit 4 (0x10) - set if: 0-9 e E .
114  template<typename unused>
115  const uint8_t globals_struct<unused>::parse_flags[256] = {
116  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117  0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, // 0
118  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
119  3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
120  0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, 0x11,0x11,1, 1, 1, 1, 1, 1, // 3
121  1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
122  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 5
123  1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
124  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
125 
126  // 128-255
127  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
128  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
129  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
130  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
131  };
132 
133  inline bool is_plain_string_character(char c) {
134  //return c >= 0x20 && c <= 0x7f && c != 0x22 && c != 0x5c;
135  return (globals::parse_flags[static_cast<unsigned char>(c)] & 1) != 0;
136  }
137 
138  inline bool is_whitespace(char c) {
139  //return c == '\r' || c == '\n' || c == '\t' || c == ' ';
140  return (globals::parse_flags[static_cast<unsigned char>(c)] & 2) != 0;
141  }
142 
144  public:
146  : memory(0)
147  {}
148 
149  explicit allocated_buffer(size_t length) {
150  // throws std::bad_alloc upon allocation failure
151  void* buffer = operator new(sizeof(size_t) + length);
152  memory = static_cast<layout*>(buffer);
153  memory->refcount = 1;
154  }
155 
157  : memory(that.memory)
158  {
159  incref();
160  }
161 
163  : memory(that.memory)
164  {
165  that.memory = 0;
166  }
167 
168  ~allocated_buffer() {
169  decref();
170  }
171 
172  allocated_buffer& operator=(const allocated_buffer& that) {
173  if (this != &that) {
174  decref();
175  memory = that.memory;
176  incref();
177  }
178  return *this;
179  }
180 
181  allocated_buffer& operator=(allocated_buffer&& that) {
182  if (this != &that) {
183  decref();
184  memory = that.memory;
185  that.memory = 0;
186  }
187  return *this;
188  }
189 
190  char* get_data() const {
191  return memory ? memory->data : 0;
192  }
193 
194  private:
195  void incref() const {
196  if (memory) {
197  ++(memory->refcount);
198  }
199  }
200 
201  void decref() const {
202  if (memory && --(memory->refcount) == 0) {
203  operator delete(memory);
204  }
205  }
206 
207  struct layout {
208  size_t refcount;
209  char data[];
210  };
211 
212  layout* memory;
213  };
214  }
215 
218  class string {
219  public:
220  string(const char* text, size_t length)
221  : text(text)
222  , _length(length)
223  {}
224 
225  const char* data() const {
226  return text;
227  }
228 
229  size_t length() const {
230  return _length;
231  }
232 
233 #ifndef SAJSON_NO_STD_STRING
234  std::string as_string() const {
235  return std::string(text, text + _length);
236  }
237 #endif
238 
239  private:
240  const char* const text;
241  const size_t _length;
242 
243  string(); /*=delete*/
244  };
245 
248  class literal : public string {
249  public:
250  explicit literal(const char* text)
251  : string(text, strlen(text))
252  {}
253  };
254 
258  public:
261  : length_(0)
262  , data(0)
263  , buffer()
264  {}
265 
270  mutable_string_view(size_t length, char* data)
271  : length_(length)
272  , data(data)
273  , buffer()
274  {}
275 
279  : length_(s.length())
280  , buffer(length_)
281  {
282  data = buffer.get_data();
283  memcpy(data, s.data(), length_);
284  }
285 
288  mutable_string_view(const string& s)
289  : length_(s.length())
290  , buffer(length_)
291  {
292  data = buffer.get_data();
293  memcpy(data, s.data(), length_);
294  }
295 
300  : length_(that.length_)
301  , data(that.data)
302  , buffer(that.buffer)
303  {}
304 
307  : length_(that.length_)
308  , data(that.data)
309  , buffer(std::move(that.buffer))
310  {
311  that.length_ = 0;
312  that.data = 0;
313  }
314 
315  mutable_string_view& operator=(mutable_string_view&& that) {
316  if (this != &that) {
317  length_ = that.length_;
318  data = that.data;
319  buffer = std::move(that.buffer);
320  that.length_ = 0;
321  that.data = 0;
322  }
323  return *this;
324  }
325 
326  mutable_string_view& operator=(const mutable_string_view& that) {
327  if (this != &that) {
328  length_ = that.length_;
329  data = that.data;
330  buffer = that.buffer;
331  }
332  return *this;
333  }
334 
335  size_t length() const {
336  return length_;
337  }
338 
339  char* get_data() const {
340  return data;
341  }
342 
343  private:
344  size_t length_;
345  char* data;
346  internal::allocated_buffer buffer; // may not be allocated
347  };
348 
349  namespace internal {
351  size_t key_start;
352  size_t key_end;
353  size_t value;
354  };
355 
357  object_key_comparator(const char* object_data)
358  : data(object_data)
359  {}
360 
361  bool operator()(const object_key_record& lhs, const string& rhs) const {
362  const size_t lhs_length = lhs.key_end - lhs.key_start;
363  const size_t rhs_length = rhs.length();
364  if (lhs_length < rhs_length) {
365  return true;
366  } else if (lhs_length > rhs_length) {
367  return false;
368  }
369  return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0;
370  }
371 
372  bool operator()(const string& lhs, const object_key_record& rhs) const {
373  return !(*this)(rhs, lhs);
374  }
375 
376  bool operator()(
377  const object_key_record& lhs,
378  const object_key_record& rhs
379  ) {
380  const size_t lhs_length = lhs.key_end - lhs.key_start;
381  const size_t rhs_length = rhs.key_end - rhs.key_start;
382  if (lhs_length < rhs_length) {
383  return true;
384  } else if (lhs_length > rhs_length) {
385  return false;
386  }
387  return memcmp(
388  data + lhs.key_start,
389  data + rhs.key_start,
390  lhs_length
391  ) < 0;
392  }
393 
394  const char* data;
395  };
396  }
397 
398  namespace integer_storage {
399  enum {
400  word_length = 1
401  };
402 
403  inline int load(const size_t* location) {
404  int value;
405  memcpy(&value, location, sizeof(value));
406  return value;
407  }
408 
409  inline void store(size_t* location, int value) {
410  // NOTE: Most modern compilers optimize away this constant-size
411  // memcpy into a single instruction. If any don't, and treat
412  // punning through a union as legal, they can be special-cased.
413  static_assert(
414  sizeof(value) <= sizeof(*location),
415  "size_t must not be smaller than int");
416  memcpy(location, &value, sizeof(value));
417  }
418  }
419 
420  namespace double_storage {
421  enum {
422  word_length = sizeof(double) / sizeof(size_t)
423  };
424 
425  inline double load(const size_t* location) {
426  double value;
427  memcpy(&value, location, sizeof(double));
428  return value;
429  }
430 
431  inline void store(size_t* location, double value) {
432  // NOTE: Most modern compilers optimize away this constant-size
433  // memcpy into a single instruction. If any don't, and treat
434  // punning through a union as legal, they can be special-cased.
435  memcpy(location, &value, sizeof(double));
436  }
437  }
438 
445  class value {
446  public:
448  type get_type() const {
449  return value_type;
450  }
451 
454  size_t get_length() const {
455  assert_type_2(TYPE_ARRAY, TYPE_OBJECT);
456  return payload[0];
457  }
458 
462  value get_array_element(size_t index) const {
463  using namespace internal;
464  assert_type(TYPE_ARRAY);
465  size_t element = payload[1 + index];
466  return value(get_element_type(element), payload + get_element_value(element), text);
467  }
468 
472  string get_object_key(size_t index) const {
473  assert_type(TYPE_OBJECT);
474  const size_t* s = payload + 1 + index * 3;
475  return string(text + s[0], s[1] - s[0]);
476  }
477 
480  value get_object_value(size_t index) const {
481  using namespace internal;
482  assert_type(TYPE_OBJECT);
483  size_t element = payload[3 + index * 3];
484  return value(get_element_type(element), payload + get_element_value(element), text);
485  }
486 
490  value get_value_of_key(const string& key) const {
491  assert_type(TYPE_OBJECT);
492  size_t i = find_object_key(key);
493  if (i < get_length()) {
494  return get_object_value(i);
495  } else {
496  return value(TYPE_NULL, 0, 0);
497  }
498  }
499 
504  size_t find_object_key(const string& key) const {
505  using namespace internal;
506  assert_type(TYPE_OBJECT);
507  const object_key_record* start = reinterpret_cast<const object_key_record*>(payload + 1);
508  const object_key_record* end = start + get_length();
509  const object_key_record* i = std::lower_bound(start, end, key, object_key_comparator(text));
510  return (i != end
511  && (i->key_end - i->key_start) == key.length()
512  && memcmp(key.data(), text + i->key_start, key.length()) == 0)? i - start : get_length();
513  }
514 
517  int get_integer_value() const {
518  assert_type(TYPE_INTEGER);
519  return integer_storage::load(payload);
520  }
521 
524  double get_double_value() const {
525  assert_type(TYPE_DOUBLE);
526  return double_storage::load(payload);
527  }
528 
531  double get_number_value() const {
532  assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
533  if (get_type() == TYPE_INTEGER) {
534  return get_integer_value();
535  } else {
536  return get_double_value();
537  }
538  }
539 
547  bool get_int53_value(int64_t* out) const {
548  // Make sure the output variable is always defined to avoid any
549  // possible situation like
550  // https://gist.github.com/chadaustin/2c249cb850619ddec05b23ca42cf7a18
551  *out = 0;
552 
553  assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
554  if (get_type() == TYPE_INTEGER) {
555  *out = get_integer_value();
556  return true;
557  } else if (get_type() == TYPE_DOUBLE) {
558  double v = get_double_value();
559  if (v < -(1LL << 53) || v > (1LL << 53)) {
560  return false;
561  }
562  int64_t as_int = static_cast<int64_t>(v);
563  if (as_int != v) {
564  return false;
565  }
566  *out = as_int;
567  return true;
568  } else {
569  return false;
570  }
571  }
572 
575  size_t get_string_length() const {
576  assert_type(TYPE_STRING);
577  return payload[1] - payload[0];
578  }
579 
586  const char* as_cstring() const {
587  assert_type(TYPE_STRING);
588  return text + payload[0];
589  }
590 
591 #ifndef SAJSON_NO_STD_STRING
592  std::string as_string() const {
595  assert_type(TYPE_STRING);
596  return std::string(text + payload[0], text + payload[1]);
597  }
598 #endif
599 
601  const size_t* _internal_get_payload() const {
602  return payload;
603  }
605 
606  private:
607  explicit value(type value_type, const size_t* payload, const char* text)
608  : value_type(value_type)
609  , payload(payload)
610  , text(text)
611  {}
612 
613  void assert_type(type expected) const {
614  assert(expected == get_type());
615  }
616 
617  void assert_type_2(type e1, type e2) const {
618  assert(e1 == get_type() || e2 == get_type());
619  }
620 
621  void assert_in_bounds(size_t i) const {
622  assert(i < get_length());
623  }
624 
625  const type value_type;
626  const size_t* const payload;
627  const char* const text;
628 
629  friend class document;
630  };
631 
633  enum error {
634  ERROR_SUCCESS,
635  ERROR_OUT_OF_MEMORY,
636  ERROR_UNEXPECTED_END,
637  ERROR_MISSING_ROOT_ELEMENT,
638  ERROR_BAD_ROOT,
639  ERROR_EXPECTED_COMMA,
640  ERROR_MISSING_OBJECT_KEY,
641  ERROR_EXPECTED_COLON,
642  ERROR_EXPECTED_END_OF_INPUT,
643  ERROR_UNEXPECTED_COMMA,
644  ERROR_EXPECTED_VALUE,
645  ERROR_EXPECTED_NULL,
646  ERROR_EXPECTED_FALSE,
647  ERROR_EXPECTED_TRUE,
648  ERROR_INVALID_NUMBER,
649  ERROR_MISSING_EXPONENT,
650  ERROR_ILLEGAL_CODEPOINT,
651  ERROR_INVALID_UNICODE_ESCAPE,
652  ERROR_UNEXPECTED_END_OF_UTF16,
653  ERROR_EXPECTED_U,
654  ERROR_INVALID_UTF16_TRAIL_SURROGATE,
655  ERROR_UNKNOWN_ESCAPE,
656  ERROR_INVALID_UTF8,
657  };
658 
659  namespace internal {
660  class ownership {
661  public:
662  ownership() = delete;
663  ownership(const ownership&) = delete;
664  void operator=(const ownership&) = delete;
665 
666  explicit ownership(size_t* p)
667  : p(p)
668  {}
669 
670  ownership(ownership&& p)
671  : p(p.p) {
672  p.p = 0;
673  }
674 
675  ~ownership() {
676  delete[] p;
677  }
678 
679  bool is_valid() const {
680  return !!p;
681  }
682 
683  private:
684  size_t* p;
685  };
686 
687  inline const char* get_error_text(error error_code) {
688  switch (error_code) {
689  case ERROR_SUCCESS: return "no error";
690  case ERROR_OUT_OF_MEMORY: return "out of memory";
691  case ERROR_UNEXPECTED_END: return "unexpected end of input";
692  case ERROR_MISSING_ROOT_ELEMENT: return "missing root element";
693  case ERROR_BAD_ROOT: return "document root must be object or array";
694  case ERROR_EXPECTED_COMMA: return "expected ,";
695  case ERROR_MISSING_OBJECT_KEY: return "missing object key";
696  case ERROR_EXPECTED_COLON: return "expected :";
697  case ERROR_EXPECTED_END_OF_INPUT: return "expected end of input";
698  case ERROR_UNEXPECTED_COMMA: return "unexpected comma";
699  case ERROR_EXPECTED_VALUE: return "expected value";
700  case ERROR_EXPECTED_NULL: return "expected 'null'";
701  case ERROR_EXPECTED_FALSE: return "expected 'false'";
702  case ERROR_EXPECTED_TRUE: return "expected 'true'";
703  case ERROR_INVALID_NUMBER: return "invalid number";
704  case ERROR_MISSING_EXPONENT: return "missing exponent";
705  case ERROR_ILLEGAL_CODEPOINT: return "illegal unprintable codepoint in string";
706  case ERROR_INVALID_UNICODE_ESCAPE: return "invalid character in unicode escape";
707  case ERROR_UNEXPECTED_END_OF_UTF16: return "unexpected end of input during UTF-16 surrogate pair";
708  case ERROR_EXPECTED_U: return "expected \\u";
709  case ERROR_INVALID_UTF16_TRAIL_SURROGATE: return "invalid UTF-16 trail surrogate";
710  case ERROR_UNKNOWN_ESCAPE: return "unknown escape";
711  case ERROR_INVALID_UTF8: return "invalid UTF-8";
712  }
713 
714  SAJSON_UNREACHABLE();
715  }
716  }
717 
727  class document {
728  public:
729  document(document&& rhs)
730  : input(rhs.input)
731  , structure(std::move(rhs.structure))
732  , root_type(rhs.root_type)
733  , root(rhs.root)
734  , error_line(rhs.error_line)
735  , error_column(rhs.error_column)
736  , error_code(rhs.error_code)
737  , error_arg(rhs.error_arg)
738  {
739  // Yikes... but strcpy is okay here because formatted_error is
740  // guaranteed to be null-terminated.
741  strcpy(formatted_error_message, rhs.formatted_error_message);
742  // should rhs's fields be zeroed too?
743  }
744 
751  bool is_valid() const {
752  return root_type == TYPE_ARRAY || root_type == TYPE_OBJECT;
753  }
754 
756  value get_root() const {
757  return value(root_type, root, input.get_data());
758  }
759 
761  size_t get_error_line() const {
762  return error_line;
763  }
764 
766  size_t get_error_column() const {
767  return error_column;
768  }
769 
770 #ifndef SAJSON_NO_STD_STRING
771  std::string get_error_message_as_string() const {
773  return formatted_error_message;
774  }
775 #endif
776 
778  const char* get_error_message_as_cstring() const {
779  return formatted_error_message;
780  }
781 
783 
784  // WARNING: Internal function which is subject to change
785  error _internal_get_error_code() const {
786  return error_code;
787  }
788 
789  // WARNING: Internal function which is subject to change
790  int _internal_get_error_argument() const {
791  return error_arg;
792  }
793 
794  // WARNING: Internal function which is subject to change
795  const char* _internal_get_error_text() const {
796  return internal::get_error_text(error_code);
797  }
798 
799  // WARNING: Internal function exposed only for high-performance language bindings.
800  type _internal_get_root_type() const {
801  return root_type;
802  }
803 
804  // WARNING: Internal function exposed only for high-performance language bindings.
805  const size_t* _internal_get_root() const {
806  return root;
807  }
808 
809  // WARNING: Internal function exposed only for high-performance language bindings.
810  const mutable_string_view& _internal_get_input() const {
811  return input;
812  }
813 
815 
816  private:
817  document(const document&) = delete;
818  void operator=(const document&) = delete;
819 
820  explicit document(const mutable_string_view& input, internal::ownership&& structure, type root_type, const size_t* root)
821  : input(input)
822  , structure(std::move(structure))
823  , root_type(root_type)
824  , root(root)
825  , error_line(0)
826  , error_column(0)
827  , error_code(ERROR_SUCCESS)
828  , error_arg(0)
829  {
830  formatted_error_message[0] = 0;
831  }
832 
833  explicit document(const mutable_string_view& input, size_t error_line, size_t error_column, const error error_code, int error_arg)
834  : input(input)
835  , structure(0)
836  , root_type(TYPE_NULL)
837  , root(0)
838  , error_line(error_line)
839  , error_column(error_column)
840  , error_code(error_code)
841  , error_arg(error_arg)
842  {
843  formatted_error_message[ERROR_BUFFER_LENGTH - 1] = 0;
844  int written = has_significant_error_arg()
845  ? SAJSON_snprintf(formatted_error_message, ERROR_BUFFER_LENGTH - 1, "%s: %d", _internal_get_error_text(), error_arg)
846  : SAJSON_snprintf(formatted_error_message, ERROR_BUFFER_LENGTH - 1, "%s", _internal_get_error_text());
847  (void)written;
848  assert(written >= 0 && written < ERROR_BUFFER_LENGTH);
849  }
850 
851  bool has_significant_error_arg() const {
852  return error_code == ERROR_ILLEGAL_CODEPOINT;
853  }
854 
855  mutable_string_view input;
856  internal::ownership structure;
857  const type root_type;
858  const size_t* const root;
859  const size_t error_line;
860  const size_t error_column;
861  const error error_code;
862  const int error_arg;
863 
864  enum { ERROR_BUFFER_LENGTH = 128 };
865  char formatted_error_message[ERROR_BUFFER_LENGTH];
866 
867  template<typename AllocationStrategy, typename StringType>
868  friend document parse(const AllocationStrategy& strategy, const StringType& string);
869  template<typename Allocator>
870  friend class parser;
871  };
872 
877  public:
879 
880  class stack_head {
881  public:
882  stack_head(stack_head&& other)
883  : stack_bottom(other.stack_bottom)
884  , stack_top(other.stack_top)
885  {}
886 
887  bool push(size_t element) {
888  *stack_top++ = element;
889  return true;
890  }
891 
892  size_t* reserve(size_t amount, bool* success) {
893  size_t* rv = stack_top;
894  stack_top += amount;
895  *success = true;
896  return rv;
897  }
898 
899  // The compiler does not see the stack_head (stored in a local)
900  // and the allocator (stored as a field) have the same stack_bottom
901  // values, so it does a bit of redundant work.
902  // So there's a microoptimization available here: introduce a type
903  // "stack_mark" and make it polymorphic on the allocator. For
904  // single_allocation, it merely needs to be a single pointer.
905 
906  void reset(size_t new_top) {
907  stack_top = stack_bottom + new_top;
908  }
909 
910  size_t get_size() {
911  return stack_top - stack_bottom;
912  }
913 
914  size_t* get_top() {
915  return stack_top;
916  }
917 
918  size_t* get_pointer_from_offset(size_t offset) {
919  return stack_bottom + offset;
920  }
921 
922  private:
923  stack_head() = delete;
924  stack_head(const stack_head&) = delete;
925  void operator=(const stack_head&) = delete;
926 
927  explicit stack_head(size_t* base)
928  : stack_bottom(base)
929  , stack_top(base)
930  {}
931 
932  size_t* const stack_bottom;
933  size_t* stack_top;
934 
935  friend class single_allocation;
936  };
937 
938  class allocator {
939  public:
940  allocator() = delete;
941  allocator(const allocator&) = delete;
942  void operator=(const allocator&) = delete;
943 
944  explicit allocator(size_t* buffer, size_t input_size, bool should_deallocate)
945  : structure(buffer)
946  , structure_end(buffer ? buffer + input_size : 0)
947  , write_cursor(structure_end)
948  , should_deallocate(should_deallocate)
949  {}
950 
951  explicit allocator(std::nullptr_t)
952  : structure(0)
953  , structure_end(0)
954  , write_cursor(0)
955  , should_deallocate(false)
956  {}
957 
958  allocator(allocator&& other)
959  : structure(other.structure)
960  , structure_end(other.structure_end)
961  , write_cursor(other.write_cursor)
962  , should_deallocate(other.should_deallocate)
963  {
964  other.structure = 0;
965  other.structure_end = 0;
966  other.write_cursor = 0;
967  other.should_deallocate = false;
968  }
969 
970  ~allocator() {
971  if (should_deallocate) {
972  delete[] structure;
973  }
974  }
975 
976  stack_head get_stack_head(bool* success) {
977  *success = true;
978  return stack_head(structure);
979  }
980 
981  size_t get_write_offset() {
982  return structure_end - write_cursor;
983  }
984 
985  size_t* get_write_pointer_of(size_t v) {
986  return structure_end - v;
987  }
988 
989  size_t* reserve(size_t size, bool* success) {
990  *success = true;
991  write_cursor -= size;
992  return write_cursor;
993  }
994 
995  size_t* get_ast_root() {
996  return write_cursor;
997  }
998 
999  internal::ownership transfer_ownership() {
1000  auto p = structure;
1001  structure = 0;
1002  structure_end = 0;
1003  write_cursor = 0;
1004  if (should_deallocate) {
1005  return internal::ownership(p);
1006  } else {
1007  return internal::ownership(0);
1008  }
1009  }
1010 
1011  private:
1012  size_t* structure;
1013  size_t* structure_end;
1014  size_t* write_cursor;
1015  bool should_deallocate;
1016  };
1017 
1019 
1023  : has_existing_buffer(false)
1024  , existing_buffer(0)
1025  , existing_buffer_size(0)
1026  {}
1027 
1032  single_allocation(size_t* existing_buffer, size_t size_in_words)
1033  : has_existing_buffer(true)
1034  , existing_buffer(existing_buffer)
1035  , existing_buffer_size(size_in_words)
1036  {}
1037 
1040  template<size_t N>
1041  explicit single_allocation(size_t (&existing_buffer)[N])
1042  : single_allocation(existing_buffer, N)
1043  {}
1044 
1046 
1047  allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1048  if (has_existing_buffer) {
1049  if (existing_buffer_size < input_document_size_in_bytes) {
1050  *succeeded = false;
1051  return allocator(nullptr);
1052  }
1053  *succeeded = true;
1054  return allocator(existing_buffer, input_document_size_in_bytes, false);
1055  } else {
1056  size_t* buffer = new(std::nothrow) size_t[input_document_size_in_bytes];
1057  if (!buffer) {
1058  *succeeded = false;
1059  return allocator(nullptr);
1060  }
1061  *succeeded = true;
1062  return allocator(buffer, input_document_size_in_bytes, true);
1063  }
1064  }
1065 
1067 
1068  private:
1069  bool has_existing_buffer;
1070  size_t* existing_buffer;
1071  size_t existing_buffer_size;
1072  };
1073 
1078  public:
1080 
1081  class stack_head {
1082  public:
1083  stack_head(stack_head&& other)
1084  : stack_top(other.stack_top)
1085  , stack_bottom(other.stack_bottom)
1086  , stack_limit(other.stack_limit)
1087  {
1088  other.stack_top = 0;
1089  other.stack_bottom = 0;
1090  other.stack_limit = 0;
1091  }
1092 
1093  ~stack_head() {
1094  delete[] stack_bottom;
1095  }
1096 
1097  bool push(size_t element) {
1098  if (can_grow(1)) {
1099  *stack_top++ = element;
1100  return true;
1101  } else {
1102  return false;
1103  }
1104  }
1105 
1106  size_t* reserve(size_t amount, bool* success) {
1107  if (can_grow(amount)) {
1108  size_t* rv = stack_top;
1109  stack_top += amount;
1110  *success = true;
1111  return rv;
1112  } else {
1113  *success = false;
1114  return 0;
1115  }
1116  }
1117 
1118  void reset(size_t new_top) {
1119  stack_top = stack_bottom + new_top;
1120  }
1121 
1122  size_t get_size() {
1123  return stack_top - stack_bottom;
1124  }
1125 
1126  size_t* get_top() {
1127  return stack_top;
1128  }
1129 
1130  size_t* get_pointer_from_offset(size_t offset) {
1131  return stack_bottom + offset;
1132  }
1133 
1134  private:
1135  stack_head(const stack_head&) = delete;
1136  void operator=(const stack_head&) = delete;
1137 
1138  explicit stack_head(size_t initial_capacity, bool* success) {
1139  assert(initial_capacity);
1140  stack_bottom = new(std::nothrow) size_t[initial_capacity];
1141  stack_top = stack_bottom;
1142  if (stack_bottom) {
1143  stack_limit = stack_bottom + initial_capacity;
1144  } else {
1145  stack_limit = 0;
1146  }
1147  *success = !!stack_bottom;
1148  }
1149 
1150  bool can_grow(size_t amount) {
1151  if (SAJSON_LIKELY(amount <= static_cast<size_t>(stack_limit - stack_top))) {
1152  return true;
1153  }
1154 
1155  size_t current_size = stack_top - stack_bottom;
1156  size_t old_capacity = stack_limit - stack_bottom;
1157  size_t new_capacity = old_capacity * 2;
1158  while (new_capacity < amount + current_size) {
1159  new_capacity *= 2;
1160  }
1161  size_t* new_stack = new(std::nothrow) size_t[new_capacity];
1162  if (!new_stack) {
1163  stack_top = 0;
1164  stack_bottom = 0;
1165  stack_limit = 0;
1166  return false;
1167  }
1168 
1169  memcpy(new_stack, stack_bottom, current_size * sizeof(size_t));
1170  delete[] stack_bottom;
1171  stack_top = new_stack + current_size;
1172  stack_bottom = new_stack;
1173  stack_limit = stack_bottom + new_capacity;
1174  return true;
1175  }
1176 
1177  size_t* stack_top; // stack grows up: stack_top >= stack_bottom
1178  size_t* stack_bottom;
1179  size_t* stack_limit;
1180 
1181  friend class dynamic_allocation;
1182  };
1183 
1184  class allocator {
1185  public:
1186  allocator() = delete;
1187  allocator(const allocator&) = delete;
1188  void operator=(const allocator&) = delete;
1189 
1190  explicit allocator(size_t* buffer, size_t current_capacity, size_t initial_stack_capacity)
1191  : ast_buffer_bottom(buffer)
1192  , ast_buffer_top(buffer + current_capacity)
1193  , ast_write_head(ast_buffer_top)
1194  , initial_stack_capacity(initial_stack_capacity)
1195  {}
1196 
1197  explicit allocator(std::nullptr_t)
1198  : ast_buffer_bottom(0)
1199  , ast_buffer_top(0)
1200  , ast_write_head(0)
1201  , initial_stack_capacity(0)
1202  {}
1203 
1204  allocator(allocator&& other)
1205  : ast_buffer_bottom(other.ast_buffer_bottom)
1206  , ast_buffer_top(other.ast_buffer_top)
1207  , ast_write_head(other.ast_write_head)
1208  , initial_stack_capacity(other.initial_stack_capacity)
1209  {
1210  other.ast_buffer_bottom = 0;
1211  other.ast_buffer_top = 0;
1212  other.ast_write_head = 0;
1213  }
1214 
1215  ~allocator() {
1216  delete[] ast_buffer_bottom;
1217  }
1218 
1219  stack_head get_stack_head(bool* success) {
1220  return stack_head(initial_stack_capacity, success);
1221  }
1222 
1223  size_t get_write_offset() {
1224  return ast_buffer_top - ast_write_head;
1225  }
1226 
1227  size_t* get_write_pointer_of(size_t v) {
1228  return ast_buffer_top - v;
1229  }
1230 
1231  size_t* reserve(size_t size, bool* success) {
1232  if (can_grow(size)) {
1233  ast_write_head -= size;
1234  *success = true;
1235  return ast_write_head;
1236  } else {
1237  *success = false;
1238  return 0;
1239  }
1240  }
1241 
1242  size_t* get_ast_root() {
1243  return ast_write_head;
1244  }
1245 
1246  internal::ownership transfer_ownership() {
1247  auto p = ast_buffer_bottom;
1248  ast_buffer_bottom = 0;
1249  ast_buffer_top = 0;
1250  ast_write_head = 0;
1251  return internal::ownership(p);
1252  }
1253 
1254  private:
1255  bool can_grow(size_t amount) {
1256  if (SAJSON_LIKELY(amount <= static_cast<size_t>(ast_write_head - ast_buffer_bottom))) {
1257  return true;
1258  }
1259  size_t current_capacity = ast_buffer_top - ast_buffer_bottom;
1260 
1261  size_t current_size = ast_buffer_top - ast_write_head;
1262  size_t new_capacity = current_capacity * 2;
1263  while (new_capacity < amount + current_size) {
1264  new_capacity *= 2;
1265  }
1266 
1267  size_t* old_buffer = ast_buffer_bottom;
1268  size_t* new_buffer = new(std::nothrow) size_t[new_capacity];
1269  if (!new_buffer) {
1270  ast_buffer_bottom = 0;
1271  ast_buffer_top = 0;
1272  ast_write_head = 0;
1273  return false;
1274  }
1275 
1276  size_t* old_write_head = ast_write_head;
1277  ast_buffer_bottom = new_buffer;
1278  ast_buffer_top = new_buffer + new_capacity;
1279  ast_write_head = ast_buffer_top - current_size;
1280  memcpy(ast_write_head, old_write_head, current_size * sizeof(size_t));
1281  delete[] old_buffer;
1282 
1283  return true;
1284  }
1285 
1286  size_t* ast_buffer_bottom; // base address of the ast buffer - it grows down
1287  size_t* ast_buffer_top;
1288  size_t* ast_write_head;
1289  size_t initial_stack_capacity;
1290  };
1291 
1293 
1296  dynamic_allocation(size_t initial_ast_capacity = 0, size_t initial_stack_capacity = 0)
1297  : initial_ast_capacity(initial_ast_capacity)
1298  , initial_stack_capacity(initial_stack_capacity)
1299  {}
1300 
1302 
1303  allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1304  size_t capacity = initial_ast_capacity;
1305  if (!capacity) {
1306  // TODO: guess based on input document size
1307  capacity = 1024;
1308  }
1309 
1310  size_t* buffer = new(std::nothrow) size_t[capacity];
1311  if (!buffer) {
1312  *succeeded = false;
1313  return allocator(nullptr);
1314  }
1315 
1316  size_t stack_capacity = initial_stack_capacity;
1317  if (!stack_capacity) {
1318  stack_capacity = 256;
1319  }
1320 
1321  *succeeded = true;
1322  return allocator(buffer, capacity, stack_capacity);
1323  }
1324 
1326 
1327  private:
1328  size_t initial_ast_capacity;
1329  size_t initial_stack_capacity;
1330  };
1331 
1335  // of memory that can be used.
1337  public:
1339 
1340  class allocator;
1341 
1342  class stack_head {
1343  public:
1344  stack_head(stack_head&& other)
1345  : source_allocator(other.source_allocator)
1346  {
1347  other.source_allocator = 0;
1348  }
1349 
1350  bool push(size_t element) {
1351  if (SAJSON_LIKELY(source_allocator->can_grow(1))) {
1352  *(source_allocator->stack_top)++ = element;
1353  return true;
1354  } else {
1355  return false;
1356  }
1357  }
1358 
1359  size_t* reserve(size_t amount, bool* success) {
1360  if (SAJSON_LIKELY(source_allocator->can_grow(amount))) {
1361  size_t* rv = source_allocator->stack_top;
1362  source_allocator->stack_top += amount;
1363  *success = true;
1364  return rv;
1365  } else {
1366  *success = false;
1367  return 0;
1368  }
1369  }
1370 
1371  void reset(size_t new_top) {
1372  source_allocator->stack_top = source_allocator->structure + new_top;
1373  }
1374 
1375  size_t get_size() {
1376  return source_allocator->stack_top - source_allocator->structure;
1377  }
1378 
1379  size_t* get_top() {
1380  return source_allocator->stack_top;
1381  }
1382 
1383  size_t* get_pointer_from_offset(size_t offset) {
1384  return source_allocator->structure + offset;
1385  }
1386 
1387  private:
1388  stack_head(const stack_head&) = delete;
1389  void operator=(const stack_head&) = delete;
1390 
1391  explicit stack_head(allocator* source_allocator)
1392  : source_allocator(source_allocator)
1393  {}
1394 
1395  allocator* source_allocator;
1396 
1397  friend class bounded_allocation;
1398  };
1399 
1400  class allocator {
1401  public:
1402  allocator() = delete;
1403  allocator(const allocator&) = delete;
1404  void operator=(const allocator&) = delete;
1405 
1406  explicit allocator(size_t* existing_buffer, size_t existing_buffer_size)
1407  : structure(existing_buffer)
1408  , structure_end(existing_buffer + existing_buffer_size)
1409  , write_cursor(structure_end)
1410  , stack_top(structure)
1411  {}
1412 
1413  allocator(allocator&& other)
1414  : structure(other.structure)
1415  , structure_end(other.structure_end)
1416  , write_cursor(other.write_cursor)
1417  , stack_top(other.stack_top)
1418  {
1419  other.structure = 0;
1420  other.structure_end = 0;
1421  other.write_cursor = 0;
1422  other.stack_top = 0;
1423  }
1424 
1425  stack_head get_stack_head(bool* success) {
1426  *success = true;
1427  return stack_head(this);
1428  }
1429 
1430  size_t get_write_offset() {
1431  return structure_end - write_cursor;
1432  }
1433 
1434  size_t* get_write_pointer_of(size_t v) {
1435  return structure_end - v;
1436  }
1437 
1438  size_t* reserve(size_t size, bool* success) {
1439  if (can_grow(size)) {
1440  write_cursor -= size;
1441  *success = true;
1442  return write_cursor;
1443  } else {
1444  *success = false;
1445  return 0;
1446  }
1447  }
1448 
1449  size_t* get_ast_root() {
1450  return write_cursor;
1451  }
1452 
1453  internal::ownership transfer_ownership() {
1454  structure = 0;
1455  structure_end = 0;
1456  write_cursor = 0;
1457  return internal::ownership(0);
1458  }
1459 
1460  private:
1461  bool can_grow(size_t amount) {
1462  // invariant: stack_top <= write_cursor
1463  // thus: write_cursor - stack_top is positive
1464  return static_cast<size_t>(write_cursor - stack_top) >= amount;
1465  }
1466 
1467  size_t* structure;
1468  size_t* structure_end;
1469  size_t* write_cursor;
1470  size_t* stack_top;
1471 
1472  friend class bounded_allocation;
1473  };
1474 
1476 
1480  bounded_allocation(size_t* existing_buffer, size_t size_in_words)
1481  : existing_buffer(existing_buffer)
1482  , existing_buffer_size(size_in_words)
1483  {}
1484 
1487  template<size_t N>
1488  explicit bounded_allocation(size_t (&existing_buffer)[N])
1489  : bounded_allocation(existing_buffer, N)
1490  {}
1491 
1493 
1494  allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1495  *succeeded = true;
1496  return allocator(existing_buffer, existing_buffer_size);
1497  }
1498 
1500 
1501  private:
1502  size_t* existing_buffer;
1503  size_t existing_buffer_size;
1504  };
1505 
1506  // I thought about putting parser in the internal namespace but I don't
1507  // want to indent it further...
1509  template<typename Allocator>
1510  class parser {
1511  public:
1512  parser(const mutable_string_view& msv, Allocator&& allocator)
1513  : input(msv)
1514  , input_end(input.get_data() + input.length())
1515  , allocator(std::move(allocator))
1516  , root_type(TYPE_NULL)
1517  , error_line(0)
1518  , error_column(0)
1519  {}
1520 
1521  document get_document() {
1522  if (parse()) {
1523  size_t* ast_root = allocator.get_ast_root();
1524  return document(input, allocator.transfer_ownership(), root_type, ast_root);
1525  } else {
1526  return document(input, error_line, error_column, error_code, error_arg);
1527  }
1528  }
1529 
1530  private:
1531  struct error_result {
1532  operator bool() const {
1533  return false;
1534  }
1535  operator char*() const {
1536  return 0;
1537  }
1538  };
1539 
1540  bool at_eof(const char* p) {
1541  return p == input_end;
1542  }
1543 
1544  char* skip_whitespace(char* p) {
1545  // There is an opportunity to make better use of superscalar
1546  // hardware here* but if someone cares about JSON parsing
1547  // performance the first thing they do is minify, so prefer
1548  // to optimize for code size here.
1549  // * https://github.com/chadaustin/Web-Benchmarks/blob/master/json/third-party/pjson/pjson.h#L1873
1550  for (;;) {
1551  if (SAJSON_UNLIKELY(p == input_end)) {
1552  return 0;
1553  } else if (internal::is_whitespace(*p)) {
1554  ++p;
1555  } else {
1556  return p;
1557  }
1558  }
1559  }
1560 
1561  error_result oom(char* p) {
1562  return make_error(p, ERROR_OUT_OF_MEMORY);
1563  }
1564 
1565  error_result unexpected_end() {
1566  return make_error(0, ERROR_UNEXPECTED_END);
1567  }
1568 
1569  error_result unexpected_end(char* p) {
1570  return make_error(p, ERROR_UNEXPECTED_END);
1571  }
1572 
1573  error_result make_error(char* p, error code, int arg = 0) {
1574  if (!p) {
1575  p = input_end;
1576  }
1577 
1578  error_line = 1;
1579  error_column = 1;
1580 
1581  char* c = input.get_data();
1582  while (c < p) {
1583  if (*c == '\r') {
1584  if (c + 1 < p && c[1] == '\n') {
1585  ++error_line;
1586  error_column = 1;
1587  ++c;
1588  } else {
1589  ++error_line;
1590  error_column = 1;
1591  }
1592  } else if (*c == '\n') {
1593  ++error_line;
1594  error_column = 1;
1595  } else {
1596  // TODO: count UTF-8 characters
1597  ++error_column;
1598  }
1599  ++c;
1600  }
1601 
1602  error_code = code;
1603  error_arg = arg;
1604  return error_result();
1605  }
1606 
1607  bool parse() {
1608  using namespace internal;
1609 
1610  // p points to the character currently being parsed
1611  char* p = input.get_data();
1612 
1613  bool success;
1614  auto stack = allocator.get_stack_head(&success);
1615  if (SAJSON_UNLIKELY(!success)) {
1616  return oom(p);
1617  }
1618 
1619  p = skip_whitespace(p);
1620  if (SAJSON_UNLIKELY(!p)) {
1621  return make_error(p, ERROR_MISSING_ROOT_ELEMENT);
1622  }
1623 
1624  // current_base is an offset to the first element of the current structure (object or array)
1625  size_t current_base = stack.get_size();
1626  type current_structure_type;
1627  if (*p == '[') {
1628  current_structure_type = TYPE_ARRAY;
1629  bool s = stack.push(make_element(current_structure_type, ROOT_MARKER));
1630  if (SAJSON_UNLIKELY(!s)) {
1631  return oom(p);
1632  }
1633  goto array_close_or_element;
1634  } else if (*p == '{') {
1635  current_structure_type = TYPE_OBJECT;
1636  bool s = stack.push(make_element(current_structure_type, ROOT_MARKER));
1637  if (SAJSON_UNLIKELY(!s)) {
1638  return oom(p);
1639  }
1640  goto object_close_or_element;
1641  } else {
1642  return make_error(p, ERROR_BAD_ROOT);
1643  }
1644 
1645  // BEGIN STATE MACHINE
1646 
1647  size_t pop_element; // used as an argument into the `pop` routine
1648 
1649  if (0) { // purely for structure
1650 
1651  // ASSUMES: byte at p SHOULD be skipped
1652  array_close_or_element:
1653  p = skip_whitespace(p + 1);
1654  if (SAJSON_UNLIKELY(!p)) {
1655  return unexpected_end();
1656  }
1657  if (*p == ']') {
1658  goto pop_array;
1659  } else {
1660  goto next_element;
1661  }
1662  SAJSON_UNREACHABLE();
1663 
1664  // ASSUMES: byte at p SHOULD be skipped
1665  object_close_or_element:
1666  p = skip_whitespace(p + 1);
1667  if (SAJSON_UNLIKELY(!p)) {
1668  return unexpected_end();
1669  }
1670  if (*p == '}') {
1671  goto pop_object;
1672  } else {
1673  goto object_key;
1674  }
1675  SAJSON_UNREACHABLE();
1676 
1677  // ASSUMES: byte at p SHOULD NOT be skipped
1678  structure_close_or_comma:
1679  p = skip_whitespace(p);
1680  if (SAJSON_UNLIKELY(!p)) {
1681  return unexpected_end();
1682  }
1683 
1684  if (current_structure_type == TYPE_ARRAY) {
1685  if (*p == ']') {
1686  goto pop_array;
1687  } else {
1688  if (SAJSON_UNLIKELY(*p != ',')) {
1689  return make_error(p, ERROR_EXPECTED_COMMA);
1690  }
1691  ++p;
1692  goto next_element;
1693  }
1694  } else {
1695  assert(current_structure_type == TYPE_OBJECT);
1696  if (*p == '}') {
1697  goto pop_object;
1698  } else {
1699  if (SAJSON_UNLIKELY(*p != ',')) {
1700  return make_error(p, ERROR_EXPECTED_COMMA);
1701  }
1702  ++p;
1703  goto object_key;
1704  }
1705  }
1706  SAJSON_UNREACHABLE();
1707 
1708  // ASSUMES: *p == '}'
1709  pop_object: {
1710  ++p;
1711  size_t* base_ptr = stack.get_pointer_from_offset(current_base);
1712  pop_element = *base_ptr;
1713  if (SAJSON_UNLIKELY(!install_object(base_ptr + 1, stack.get_top()))) {
1714  return oom(p);
1715  }
1716  goto pop;
1717  }
1718 
1719  // ASSUMES: *p == ']'
1720  pop_array: {
1721  ++p;
1722  size_t* base_ptr = stack.get_pointer_from_offset(current_base);
1723  pop_element = *base_ptr;
1724  if (SAJSON_UNLIKELY(!install_array(base_ptr + 1, stack.get_top()))) {
1725  return oom(p);
1726  }
1727  goto pop;
1728  }
1729 
1730  // ASSUMES: byte at p SHOULD NOT be skipped
1731  object_key: {
1732  p = skip_whitespace(p);
1733  if (SAJSON_UNLIKELY(!p)) {
1734  return unexpected_end();
1735  }
1736  if (SAJSON_UNLIKELY(*p != '"')) {
1737  return make_error(p, ERROR_MISSING_OBJECT_KEY);
1738  }
1739  bool success;
1740  size_t* out = stack.reserve(2, &success);
1741  if (SAJSON_UNLIKELY(!success)) {
1742  return oom(p);
1743  }
1744  p = parse_string(p, out);
1745  if (SAJSON_UNLIKELY(!p)) {
1746  return false;
1747  }
1748  p = skip_whitespace(p);
1749  if (SAJSON_UNLIKELY(!p || *p != ':')) {
1750  return make_error(p, ERROR_EXPECTED_COLON);
1751  }
1752  ++p;
1753  goto next_element;
1754  }
1755 
1756  // ASSUMES: byte at p SHOULD NOT be skipped
1757  next_element:
1758  p = skip_whitespace(p);
1759  if (SAJSON_UNLIKELY(!p)) {
1760  return unexpected_end();
1761  }
1762 
1763  type value_type_result;
1764  switch (*p) {
1765  case 0:
1766  return unexpected_end(p);
1767  case 'n':
1768  p = parse_null(p);
1769  if (!p) {
1770  return false;
1771  }
1772  value_type_result = TYPE_NULL;
1773  break;
1774  case 'f':
1775  p = parse_false(p);
1776  if (!p) {
1777  return false;
1778  }
1779  value_type_result = TYPE_FALSE;
1780  break;
1781  case 't':
1782  p = parse_true(p);
1783  if (!p) {
1784  return false;
1785  }
1786  value_type_result = TYPE_TRUE;
1787  break;
1788  case '0':
1789  case '1':
1790  case '2':
1791  case '3':
1792  case '4':
1793  case '5':
1794  case '6':
1795  case '7':
1796  case '8':
1797  case '9':
1798  case '-': {
1799  auto result = parse_number(p);
1800  p = result.first;
1801  if (!p) {
1802  return false;
1803  }
1804  value_type_result = result.second;
1805  break;
1806  }
1807  case '"': {
1808  bool success;
1809  size_t* string_tag = allocator.reserve(2, &success);
1810  if (SAJSON_UNLIKELY(!success)) {
1811  return oom(p);
1812  }
1813  p = parse_string(p, string_tag);
1814  if (!p) {
1815  return false;
1816  }
1817  value_type_result = TYPE_STRING;
1818  break;
1819  }
1820 
1821  case '[': {
1822  size_t previous_base = current_base;
1823  current_base = stack.get_size();
1824  bool s = stack.push(make_element(current_structure_type, previous_base));
1825  if (SAJSON_UNLIKELY(!s)) {
1826  return oom(p);
1827  }
1828  current_structure_type = TYPE_ARRAY;
1829  goto array_close_or_element;
1830  }
1831  case '{': {
1832  size_t previous_base = current_base;
1833  current_base = stack.get_size();
1834  bool s = stack.push(make_element(current_structure_type, previous_base));
1835  if (SAJSON_UNLIKELY(!s)) {
1836  return oom(p);
1837  }
1838  current_structure_type = TYPE_OBJECT;
1839  goto object_close_or_element;
1840  }
1841  pop: {
1842  size_t parent = get_element_value(pop_element);
1843  if (parent == ROOT_MARKER) {
1844  root_type = current_structure_type;
1845  p = skip_whitespace(p);
1846  if (SAJSON_UNLIKELY(p)) {
1847  return make_error(p, ERROR_EXPECTED_END_OF_INPUT);
1848  }
1849  return true;
1850  }
1851  stack.reset(current_base);
1852  current_base = parent;
1853  value_type_result = current_structure_type;
1854  current_structure_type = get_element_type(pop_element);
1855  break;
1856  }
1857 
1858  case ',':
1859  return make_error(p, ERROR_UNEXPECTED_COMMA);
1860  default:
1861  return make_error(p, ERROR_EXPECTED_VALUE);
1862  }
1863 
1864  bool s = stack.push(make_element(
1865  value_type_result,
1866  allocator.get_write_offset()));
1867  if (SAJSON_UNLIKELY(!s)) {
1868  return oom(p);
1869  }
1870 
1871  goto structure_close_or_comma;
1872  }
1873 
1874  SAJSON_UNREACHABLE();
1875  }
1876 
1877  bool has_remaining_characters(char* p, ptrdiff_t remaining) {
1878  return input_end - p >= remaining;
1879  }
1880 
1881  char* parse_null(char* p) {
1882  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
1883  make_error(p, ERROR_UNEXPECTED_END);
1884  return 0;
1885  }
1886  char p1 = p[1];
1887  char p2 = p[2];
1888  char p3 = p[3];
1889  if (SAJSON_UNLIKELY(p1 != 'u' || p2 != 'l' || p3 != 'l')) {
1890  make_error(p, ERROR_EXPECTED_NULL);
1891  return 0;
1892  }
1893  return p + 4;
1894  }
1895 
1896  char* parse_false(char* p) {
1897  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 5))) {
1898  return make_error(p, ERROR_UNEXPECTED_END);
1899  }
1900  char p1 = p[1];
1901  char p2 = p[2];
1902  char p3 = p[3];
1903  char p4 = p[4];
1904  if (SAJSON_UNLIKELY(p1 != 'a' || p2 != 'l' || p3 != 's' || p4 != 'e')) {
1905  return make_error(p, ERROR_EXPECTED_FALSE);
1906  }
1907  return p + 5;
1908  }
1909 
1910  char* parse_true(char* p) {
1911  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
1912  return make_error(p, ERROR_UNEXPECTED_END);
1913  }
1914  char p1 = p[1];
1915  char p2 = p[2];
1916  char p3 = p[3];
1917  if (SAJSON_UNLIKELY(p1 != 'r' || p2 != 'u' || p3 != 'e')) {
1918  return make_error(p, ERROR_EXPECTED_TRUE);
1919  }
1920  return p + 4;
1921  }
1922 
1923  static double pow10(int exponent) {
1924  if (exponent > 308) {
1925  return std::numeric_limits<double>::infinity();
1926  } else if (exponent < -323) {
1927  return 0.0;
1928  }
1929  static const double constants[] = {
1930  1e-323,1e-322,1e-321,1e-320,1e-319,1e-318,1e-317,1e-316,1e-315,1e-314,
1931  1e-313,1e-312,1e-311,1e-310,1e-309,1e-308,1e-307,1e-306,1e-305,1e-304,
1932  1e-303,1e-302,1e-301,1e-300,1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,
1933  1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,
1934  1e-283,1e-282,1e-281,1e-280,1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,
1935  1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,
1936  1e-263,1e-262,1e-261,1e-260,1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,
1937  1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,
1938  1e-243,1e-242,1e-241,1e-240,1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,
1939  1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,
1940  1e-223,1e-222,1e-221,1e-220,1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,
1941  1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,
1942  1e-203,1e-202,1e-201,1e-200,1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,
1943  1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,
1944  1e-183,1e-182,1e-181,1e-180,1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,
1945  1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,
1946  1e-163,1e-162,1e-161,1e-160,1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,
1947  1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,
1948  1e-143,1e-142,1e-141,1e-140,1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,
1949  1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,
1950  1e-123,1e-122,1e-121,1e-120,1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,
1951  1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,
1952  1e-103,1e-102,1e-101,1e-100,1e-99,1e-98,1e-97,1e-96,1e-95,1e-94,1e-93,
1953  1e-92,1e-91,1e-90,1e-89,1e-88,1e-87,1e-86,1e-85,1e-84,1e-83,1e-82,1e-81,
1954  1e-80,1e-79,1e-78,1e-77,1e-76,1e-75,1e-74,1e-73,1e-72,1e-71,1e-70,1e-69,
1955  1e-68,1e-67,1e-66,1e-65,1e-64,1e-63,1e-62,1e-61,1e-60,1e-59,1e-58,1e-57,
1956  1e-56,1e-55,1e-54,1e-53,1e-52,1e-51,1e-50,1e-49,1e-48,1e-47,1e-46,1e-45,
1957  1e-44,1e-43,1e-42,1e-41,1e-40,1e-39,1e-38,1e-37,1e-36,1e-35,1e-34,1e-33,
1958  1e-32,1e-31,1e-30,1e-29,1e-28,1e-27,1e-26,1e-25,1e-24,1e-23,1e-22,1e-21,
1959  1e-20,1e-19,1e-18,1e-17,1e-16,1e-15,1e-14,1e-13,1e-12,1e-11,1e-10,1e-9,
1960  1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,
1961  1e8,1e9,1e10,1e11,1e12,1e13,1e14,1e15,1e16,1e17,1e18,1e19,1e20,1e21,
1962  1e22,1e23,1e24,1e25,1e26,1e27,1e28,1e29,1e30,1e31,1e32,1e33,1e34,1e35,
1963  1e36,1e37,1e38,1e39,1e40,1e41,1e42,1e43,1e44,1e45,1e46,1e47,1e48,1e49,
1964  1e50,1e51,1e52,1e53,1e54,1e55,1e56,1e57,1e58,1e59,1e60,1e61,1e62,1e63,
1965  1e64,1e65,1e66,1e67,1e68,1e69,1e70,1e71,1e72,1e73,1e74,1e75,1e76,1e77,
1966  1e78,1e79,1e80,1e81,1e82,1e83,1e84,1e85,1e86,1e87,1e88,1e89,1e90,1e91,
1967  1e92,1e93,1e94,1e95,1e96,1e97,1e98,1e99,1e100,1e101,1e102,1e103,1e104,
1968  1e105,1e106,1e107,1e108,1e109,1e110,1e111,1e112,1e113,1e114,1e115,1e116,
1969  1e117,1e118,1e119,1e120,1e121,1e122,1e123,1e124,1e125,1e126,1e127,1e128,
1970  1e129,1e130,1e131,1e132,1e133,1e134,1e135,1e136,1e137,1e138,1e139,1e140,
1971  1e141,1e142,1e143,1e144,1e145,1e146,1e147,1e148,1e149,1e150,1e151,1e152,
1972  1e153,1e154,1e155,1e156,1e157,1e158,1e159,1e160,1e161,1e162,1e163,1e164,
1973  1e165,1e166,1e167,1e168,1e169,1e170,1e171,1e172,1e173,1e174,1e175,1e176,
1974  1e177,1e178,1e179,1e180,1e181,1e182,1e183,1e184,1e185,1e186,1e187,1e188,
1975  1e189,1e190,1e191,1e192,1e193,1e194,1e195,1e196,1e197,1e198,1e199,1e200,
1976  1e201,1e202,1e203,1e204,1e205,1e206,1e207,1e208,1e209,1e210,1e211,1e212,
1977  1e213,1e214,1e215,1e216,1e217,1e218,1e219,1e220,1e221,1e222,1e223,1e224,
1978  1e225,1e226,1e227,1e228,1e229,1e230,1e231,1e232,1e233,1e234,1e235,1e236,
1979  1e237,1e238,1e239,1e240,1e241,1e242,1e243,1e244,1e245,1e246,1e247,1e248,
1980  1e249,1e250,1e251,1e252,1e253,1e254,1e255,1e256,1e257,1e258,1e259,1e260,
1981  1e261,1e262,1e263,1e264,1e265,1e266,1e267,1e268,1e269,1e270,1e271,1e272,
1982  1e273,1e274,1e275,1e276,1e277,1e278,1e279,1e280,1e281,1e282,1e283,1e284,
1983  1e285,1e286,1e287,1e288,1e289,1e290,1e291,1e292,1e293,1e294,1e295,1e296,
1984  1e297,1e298,1e299,1e300,1e301,1e302,1e303,1e304,1e305,1e306,1e307,1e308
1985  };
1986  return constants[exponent + 323];
1987  }
1988 
1989  std::pair<char*, type> parse_number(char* p) {
1990  bool negative = false;
1991  if ('-' == *p) {
1992  ++p;
1993  negative = true;
1994 
1995  if (SAJSON_UNLIKELY(at_eof(p))) {
1996  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
1997  }
1998  }
1999 
2000  bool try_double = false;
2001 
2002  int i = 0;
2003  double d = 0.0; // gcc complains that d might be used uninitialized which isn't true. appease the warning anyway.
2004  if (*p == '0') {
2005  ++p;
2006  if (SAJSON_UNLIKELY(at_eof(p))) {
2007  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2008  }
2009  } else {
2010  unsigned char c = *p;
2011  if (c < '0' || c > '9') {
2012  return std::make_pair(make_error(p, ERROR_INVALID_NUMBER), TYPE_NULL);
2013  }
2014 
2015  do {
2016  ++p;
2017  if (SAJSON_UNLIKELY(at_eof(p))) {
2018  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2019  }
2020 
2021  unsigned char digit = c - '0';
2022 
2023  if (SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)) {
2024  // TODO: could split this into two loops
2025  try_double = true;
2026  d = i;
2027  }
2028  if (SAJSON_UNLIKELY(try_double)) {
2029  d = 10.0 * d + digit;
2030  } else {
2031  i = 10 * i + digit;
2032  }
2033 
2034  c = *p;
2035  } while (c >= '0' && c <= '9');
2036  }
2037 
2038  int exponent = 0;
2039 
2040  if ('.' == *p) {
2041  if (!try_double) {
2042  try_double = true;
2043  d = i;
2044  }
2045  ++p;
2046  if (SAJSON_UNLIKELY(at_eof(p))) {
2047  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2048  }
2049  char c = *p;
2050  if (c < '0' || c > '9') {
2051  return std::make_pair(make_error(p, ERROR_INVALID_NUMBER), TYPE_NULL);
2052  }
2053 
2054  do {
2055  ++p;
2056  if (SAJSON_UNLIKELY(at_eof(p))) {
2057  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2058  }
2059  d = d * 10 + (c - '0');
2060  --exponent;
2061 
2062  c = *p;
2063  } while (c >= '0' && c <= '9');
2064  }
2065 
2066  char e = *p;
2067  if ('e' == e || 'E' == e) {
2068  if (!try_double) {
2069  try_double = true;
2070  d = i;
2071  }
2072  ++p;
2073  if (SAJSON_UNLIKELY(at_eof(p))) {
2074  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2075  }
2076 
2077  bool negativeExponent = false;
2078  if ('-' == *p) {
2079  negativeExponent = true;
2080  ++p;
2081  if (SAJSON_UNLIKELY(at_eof(p))) {
2082  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2083  }
2084  } else if ('+' == *p) {
2085  ++p;
2086  if (SAJSON_UNLIKELY(at_eof(p))) {
2087  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2088  }
2089  }
2090 
2091  int exp = 0;
2092 
2093  char c = *p;
2094  if (SAJSON_UNLIKELY(c < '0' || c > '9')) {
2095  return std::make_pair(make_error(p, ERROR_MISSING_EXPONENT), TYPE_NULL);
2096  }
2097  for (;;) {
2098  exp = 10 * exp + (c - '0');
2099 
2100  ++p;
2101  if (SAJSON_UNLIKELY(at_eof(p))) {
2102  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2103  }
2104 
2105  c = *p;
2106  if (c < '0' || c > '9') {
2107  break;
2108  }
2109  }
2110  exponent += (negativeExponent ? -exp : exp);
2111  }
2112 
2113  if (exponent) {
2114  assert(try_double);
2115  d *= pow10(exponent);
2116  }
2117 
2118  if (negative) {
2119  if (try_double) {
2120  d = -d;
2121  } else {
2122  i = -i;
2123  }
2124  }
2125  if (try_double) {
2126  bool success;
2127  size_t* out = allocator.reserve(double_storage::word_length, &success);
2128  if (SAJSON_UNLIKELY(!success)) {
2129  return std::make_pair(oom(p), TYPE_NULL);
2130  }
2131  double_storage::store(out, d);
2132  return std::make_pair(p, TYPE_DOUBLE);
2133  } else {
2134  bool success;
2135  size_t* out = allocator.reserve(integer_storage::word_length, &success);
2136  if (SAJSON_UNLIKELY(!success)) {
2137  return std::make_pair(oom(p), TYPE_NULL);
2138  }
2139  integer_storage::store(out, i);
2140  return std::make_pair(p, TYPE_INTEGER);
2141  }
2142  }
2143 
2144  bool install_array(size_t* array_base, size_t* array_end) {
2145  using namespace sajson::internal;
2146 
2147  const size_t length = array_end - array_base;
2148  bool success;
2149  size_t* const new_base = allocator.reserve(length + 1, &success);
2150  if (SAJSON_UNLIKELY(!success)) {
2151  return false;
2152  }
2153  size_t* out = new_base + length + 1;
2154  size_t* const structure_end = allocator.get_write_pointer_of(0);
2155 
2156  while (array_end > array_base) {
2157  size_t element = *--array_end;
2158  type element_type = get_element_type(element);
2159  size_t element_value = get_element_value(element);
2160  size_t* element_ptr = structure_end - element_value;
2161  *--out = make_element(element_type, element_ptr - new_base);
2162  }
2163  *--out = length;
2164  return true;
2165  }
2166 
2167  bool install_object(size_t* object_base, size_t* object_end) {
2168  using namespace internal;
2169 
2170  assert((object_end - object_base) % 3 == 0);
2171  const size_t length_times_3 = object_end - object_base;
2172  std::sort(
2173  reinterpret_cast<object_key_record*>(object_base),
2174  reinterpret_cast<object_key_record*>(object_end),
2175  object_key_comparator(input.get_data()));
2176 
2177  bool success;
2178  size_t* const new_base = allocator.reserve(length_times_3 + 1, &success);
2179  if (SAJSON_UNLIKELY(!success)) {
2180  return false;
2181  }
2182  size_t* out = new_base + length_times_3 + 1;
2183  size_t* const structure_end = allocator.get_write_pointer_of(0);
2184 
2185  while (object_end > object_base) {
2186  size_t element = *--object_end;
2187  type element_type = get_element_type(element);
2188  size_t element_value = get_element_value(element);
2189  size_t* element_ptr = structure_end - element_value;
2190 
2191  *--out = make_element(element_type, element_ptr - new_base);
2192  *--out = *--object_end;
2193  *--out = *--object_end;
2194  }
2195  *--out = length_times_3 / 3;
2196  return true;
2197  }
2198 
2199  char* parse_string(char* p, size_t* tag) {
2200  using namespace internal;
2201 
2202  ++p; // "
2203  size_t start = p - input.get_data();
2204  char* input_end_local = input_end;
2205  while (input_end_local - p >= 4) {
2206  if (!is_plain_string_character(p[0])) { goto found; }
2207  if (!is_plain_string_character(p[1])) { p += 1; goto found; }
2208  if (!is_plain_string_character(p[2])) { p += 2; goto found; }
2209  if (!is_plain_string_character(p[3])) { p += 3; goto found; }
2210  p += 4;
2211  }
2212  for (;;) {
2213  if (SAJSON_UNLIKELY(p >= input_end_local)) {
2214  return make_error(p, ERROR_UNEXPECTED_END);
2215  }
2216 
2217  if (!is_plain_string_character(*p)) {
2218  break;
2219  }
2220 
2221  ++p;
2222  }
2223  found:
2224  if (SAJSON_LIKELY(*p == '"')) {
2225  tag[0] = start;
2226  tag[1] = p - input.get_data();
2227  *p = '\0';
2228  return p + 1;
2229  }
2230 
2231  if (*p >= 0 && *p < 0x20) {
2232  return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast<int>(*p));
2233  } else {
2234  // backslash or >0x7f
2235  return parse_string_slow(p, tag, start);
2236  }
2237  }
2238 
2239  char* read_hex(char* p, unsigned& u) {
2240  unsigned v = 0;
2241  int i = 4;
2242  while (i--) {
2243  unsigned char c = *p++;
2244  if (c >= '0' && c <= '9') {
2245  c -= '0';
2246  } else if (c >= 'a' && c <= 'f') {
2247  c = c - 'a' + 10;
2248  } else if (c >= 'A' && c <= 'F') {
2249  c = c - 'A' + 10;
2250  } else {
2251  return make_error(p, ERROR_INVALID_UNICODE_ESCAPE);
2252  }
2253  v = (v << 4) + c;
2254  }
2255 
2256  u = v;
2257  return p;
2258  }
2259 
2260  void write_utf8(unsigned codepoint, char*& end) {
2261  if (codepoint < 0x80) {
2262  *end++ = codepoint;
2263  } else if (codepoint < 0x800) {
2264  *end++ = 0xC0 | (codepoint >> 6);
2265  *end++ = 0x80 | (codepoint & 0x3F);
2266  } else if (codepoint < 0x10000) {
2267  *end++ = 0xE0 | (codepoint >> 12);
2268  *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
2269  *end++ = 0x80 | (codepoint & 0x3F);
2270  } else {
2271  assert(codepoint < 0x200000);
2272  *end++ = 0xF0 | (codepoint >> 18);
2273  *end++ = 0x80 | ((codepoint >> 12) & 0x3F);
2274  *end++ = 0x80 | ((codepoint >> 6) & 0x3F);
2275  *end++ = 0x80 | (codepoint & 0x3F);
2276  }
2277  }
2278 
2279  char* parse_string_slow(char* p, size_t* tag, size_t start) {
2280  char* end = p;
2281  char* input_end_local = input_end;
2282 
2283  for (;;) {
2284  if (SAJSON_UNLIKELY(p >= input_end_local)) {
2285  return make_error(p, ERROR_UNEXPECTED_END);
2286  }
2287 
2288  if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
2289  return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast<int>(*p));
2290  }
2291 
2292  switch (*p) {
2293  case '"':
2294  tag[0] = start;
2295  tag[1] = end - input.get_data();
2296  *end = '\0';
2297  return p + 1;
2298 
2299  case '\\':
2300  ++p;
2301  if (SAJSON_UNLIKELY(p >= input_end_local)) {
2302  return make_error(p, ERROR_UNEXPECTED_END);
2303  }
2304 
2305  char replacement;
2306  switch (*p) {
2307  case '"': replacement = '"'; goto replace;
2308  case '\\': replacement = '\\'; goto replace;
2309  case '/': replacement = '/'; goto replace;
2310  case 'b': replacement = '\b'; goto replace;
2311  case 'f': replacement = '\f'; goto replace;
2312  case 'n': replacement = '\n'; goto replace;
2313  case 'r': replacement = '\r'; goto replace;
2314  case 't': replacement = '\t'; goto replace;
2315  replace:
2316  *end++ = replacement;
2317  ++p;
2318  break;
2319  case 'u': {
2320  ++p;
2321  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
2322  return make_error(p, ERROR_UNEXPECTED_END);
2323  }
2324  unsigned u = 0; // gcc's complaining that this could be used uninitialized. wrong.
2325  p = read_hex(p, u);
2326  if (!p) {
2327  return 0;
2328  }
2329  if (u >= 0xD800 && u <= 0xDBFF) {
2330  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 6))) {
2331  return make_error(p, ERROR_UNEXPECTED_END_OF_UTF16);
2332  }
2333  char p0 = p[0];
2334  char p1 = p[1];
2335  if (p0 != '\\' || p1 != 'u') {
2336  return make_error(p, ERROR_EXPECTED_U);
2337  }
2338  p += 2;
2339  unsigned v = 0; // gcc's complaining that this could be used uninitialized. wrong.
2340  p = read_hex(p, v);
2341  if (!p) {
2342  return p;
2343  }
2344 
2345  if (v < 0xDC00 || v > 0xDFFF) {
2346  return make_error(p, ERROR_INVALID_UTF16_TRAIL_SURROGATE);
2347  }
2348  u = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00));
2349  }
2350  write_utf8(u, end);
2351  break;
2352  }
2353  default:
2354  return make_error(p, ERROR_UNKNOWN_ESCAPE);
2355  }
2356  break;
2357 
2358  default:
2359  // validate UTF-8
2360  unsigned char c0 = p[0];
2361  if (c0 < 128) {
2362  *end++ = *p++;
2363  } else if (c0 < 224) {
2364  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 2))) {
2365  return unexpected_end(p);
2366  }
2367  unsigned char c1 = p[1];
2368  if (c1 < 128 || c1 >= 192) {
2369  return make_error(p + 1, ERROR_INVALID_UTF8);
2370  }
2371  end[0] = c0;
2372  end[1] = c1;
2373  end += 2;
2374  p += 2;
2375  } else if (c0 < 240) {
2376  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 3))) {
2377  return unexpected_end(p);
2378  }
2379  unsigned char c1 = p[1];
2380  if (c1 < 128 || c1 >= 192) {
2381  return make_error(p + 1, ERROR_INVALID_UTF8);
2382  }
2383  unsigned char c2 = p[2];
2384  if (c2 < 128 || c2 >= 192) {
2385  return make_error(p + 2, ERROR_INVALID_UTF8);
2386  }
2387  end[0] = c0;
2388  end[1] = c1;
2389  end[2] = c2;
2390  end += 3;
2391  p += 3;
2392  } else if (c0 < 248) {
2393  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
2394  return unexpected_end(p);
2395  }
2396  unsigned char c1 = p[1];
2397  if (c1 < 128 || c1 >= 192) {
2398  return make_error(p + 1, ERROR_INVALID_UTF8);
2399  }
2400  unsigned char c2 = p[2];
2401  if (c2 < 128 || c2 >= 192) {
2402  return make_error(p + 2, ERROR_INVALID_UTF8);
2403  }
2404  unsigned char c3 = p[3];
2405  if (c3 < 128 || c3 >= 192) {
2406  return make_error(p + 3, ERROR_INVALID_UTF8);
2407  }
2408  end[0] = c0;
2409  end[1] = c1;
2410  end[2] = c2;
2411  end[3] = c3;
2412  end += 4;
2413  p += 4;
2414  } else {
2415  return make_error(p, ERROR_INVALID_UTF8);
2416  }
2417  break;
2418  }
2419  }
2420  }
2421 
2422  mutable_string_view input;
2423  char* const input_end;
2424  Allocator allocator;
2425 
2426  type root_type;
2427  size_t error_line;
2428  size_t error_column;
2429  error error_code;
2430  int error_arg; // optional argument for the error
2431  };
2433 
2445  template<typename AllocationStrategy, typename StringType>
2446  document parse(const AllocationStrategy& strategy, const StringType& string) {
2447  mutable_string_view input(string);
2448 
2449  bool success;
2450  auto allocator = strategy.make_allocator(input.length(), &success);
2451  if (!success) {
2452  return document(input, 1, 1, ERROR_OUT_OF_MEMORY, 0);
2453  }
2454 
2455  return parser<typename AllocationStrategy::allocator>(
2456  input,
2457  std::move(allocator)
2458  ).get_document();
2459  }
2460 }
type
Tag indicating a JSON value&#39;s type.
Definition: sajson.h:71
mutable_string_view(size_t length, char *data)
Given a length in bytes and a pointer, constructs a view that does not allocate a copy of the data or...
Definition: sajson.h:270
mutable_string_view(const mutable_string_view &that)
Copies a mutable_string_view.
Definition: sajson.h:299
double get_number_value() const
Returns a numeric value as a double-precision float.
Definition: sajson.h:531
single_allocation(size_t *existing_buffer, size_t size_in_words)
Write the AST into an existing buffer.
Definition: sajson.h:1032
Definition: sajson.h:143
A pointer to a mutable buffer, its size in bytes, and strong ownership of any copied memory...
Definition: sajson.h:257
size_t get_length() const
Returns the length of the object or array.
Definition: sajson.h:454
bool get_int53_value(int64_t *out) const
Returns true and writes to the output argument if the numeric value fits in a 53-bit integer...
Definition: sajson.h:547
single_allocation()
Allocate a single worst-case AST buffer with one word per byte in the input document.
Definition: sajson.h:1022
const char * get_error_message_as_cstring() const
If not is_valid(), returns a null-terminated C string indicating why the parse failed.
Definition: sajson.h:778
Definition: sajson.h:106
Definition: sajson.h:350
size_t get_string_length() const
Returns the length of the string.
Definition: sajson.h:575
bounded_allocation(size_t(&existing_buffer)[N])
Convenience wrapper for bounded_allocation(size_t*, size) that automatically infers the size of the g...
Definition: sajson.h:1488
type get_type() const
Returns the JSON value&#39;s type.
Definition: sajson.h:448
A simple type encoding a pointer to some memory and a length (in bytes).
Definition: sajson.h:218
value get_value_of_key(const string &key) const
Given a string key, returns the value with that key or a null value if the key is not found...
Definition: sajson.h:490
Represents the result of a JSON parse: either is_valid() and the document contains a root value or pa...
Definition: sajson.h:727
Allocation policy that allocates one large buffer guaranteed to hold the resulting AST...
Definition: sajson.h:876
Allocation policy that uses dynamically-growing buffers for both the parse stack and the AST...
Definition: sajson.h:1077
string get_object_key(size_t index) const
Returns the nth key of an object.
Definition: sajson.h:472
Definition: sajson.h:82
double get_double_value() const
If a numeric value was parsed as a double, returns it.
Definition: sajson.h:524
mutable_string_view(const literal &s)
Allocates a copy of the given literal string and exposes a mutable view into it.
Definition: sajson.h:278
Definition: sajson.h:660
Represents a JSON value.
Definition: sajson.h:445
mutable_string_view()
Creates an empty, zero-sized view.
Definition: sajson.h:260
single_allocation(size_t(&existing_buffer)[N])
Convenience wrapper for single_allocation(size_t*, size_t) that automatically infers the length of a ...
Definition: sajson.h:1041
sajson Public API
Definition: sajson.h:68
value get_object_value(size_t index) const
Returns the nth value of an object.
Definition: sajson.h:480
dynamic_allocation(size_t initial_ast_capacity=0, size_t initial_stack_capacity=0)
Creates a dynamic_allocation policy with the given initial AST and stack buffer sizes.
Definition: sajson.h:1296
bounded_allocation(size_t *existing_buffer, size_t size_in_words)
Uses an existing buffer to hold the parsed AST, if it fits.
Definition: sajson.h:1480
mutable_string_view(const string &s)
Allocates a copy of the given string and exposes a mutable view into it.
Definition: sajson.h:288
value get_array_element(size_t index) const
Returns the nth element of an array.
Definition: sajson.h:462
size_t get_error_column() const
If not is_valid(), returns the one-based column number where the parse failed.
Definition: sajson.h:766
mutable_string_view(mutable_string_view &&that)
Move constructor - neuters the old mutable_string_view.
Definition: sajson.h:306
int get_integer_value() const
If a numeric value was parsed as a 32-bit integer, returns it.
Definition: sajson.h:517
const char * as_cstring() const
Returns a pointer to the beginning of a string value&#39;s data.
Definition: sajson.h:586
size_t get_error_line() const
If not is_valid(), returns the one-based line number where the parse failed.
Definition: sajson.h:761
value get_root() const
If is_valid(), returns the document&#39;s root value.
Definition: sajson.h:756
error
Error code indicating why parse failed.
Definition: sajson.h:633
A convenient way to parse JSON from a string literal.
Definition: sajson.h:248
bool is_valid() const
Returns true if the document was parsed successfully.
Definition: sajson.h:751
size_t find_object_key(const string &key) const
Given a string key, returns the index of the associated value if one exists.
Definition: sajson.h:504
document parse(const AllocationStrategy &strategy, const StringType &string)
Parses a string of JSON bytes into a document, given an allocation strategy instance.
Definition: sajson.h:2446
Allocation policy that attempts to fit the parsed AST into an existing memory buffer.
Definition: sajson.h:1336