#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP #include #include #include #include #include #include #include namespace boost { namespace property_tree { namespace json_parser { namespace detail { template class source { public: typedef typename std::iterator_traits::value_type code_unit; typedef bool (Encoding::*encoding_predicate)(code_unit c) const; explicit source(Encoding& encoding) : encoding(encoding) {} template void set_input(const std::string& filename, const Range& r) { this->filename = filename; cur = r.begin(); end = r.end(); // Note that there is no backtracking, so if e.g. a UTF-8 file // starts with something that initially looks like a BOM but isn't, // there's trouble. // However, no valid JSON file can start with a UTF-8 EF byte. encoding.skip_introduction(cur, end); line = 1; offset = 0; } bool done() const { return cur == end; } void parse_error(const char* msg) { BOOST_PROPERTY_TREE_THROW( json_parser_error(msg, filename, line)); } void next() { if (encoding.is_nl(*cur)) { ++line; offset = 0; } else { ++offset; } ++cur; } template bool have(encoding_predicate p, Action& a) { bool found = cur != end && (encoding.*p)(*cur); if (found) { a(*cur); next(); } return found; } bool have(encoding_predicate p) { DoNothing n; return have(p, n); } template void expect(encoding_predicate p, const char* msg, Action& a) { if (!have(p, a)) { parse_error(msg); } } void expect(encoding_predicate p, const char* msg) { DoNothing n; expect(p, msg, n); } code_unit need_cur(const char* msg) { if (cur == end) { parse_error(msg); } return *cur; } Iterator& raw_cur() { return cur; } Sentinel raw_end() { return end; } private: struct DoNothing { void operator ()(code_unit) const {} }; Encoding& encoding; Iterator cur; Sentinel end; std::string filename; int line; int offset; }; template ::iterator_category> class number_callback_adapter { public: number_callback_adapter(Callbacks& callbacks, Encoding& encoding, Iterator& cur) : callbacks(callbacks), encoding(encoding), first(cur), cur(cur) {} void operator ()(typename Encoding::external_char) {} void finish() const { callbacks.on_number(encoding.to_internal(first, cur)); } private: number_callback_adapter(const number_callback_adapter&); Callbacks& callbacks; Encoding& encoding; Iterator first; Iterator& cur; }; template class number_callback_adapter { public: number_callback_adapter(Callbacks& callbacks, Encoding& encoding, Iterator&) : callbacks(callbacks), encoding(encoding), first(true) {} void operator ()(typename Encoding::external_char c) { if (first) { callbacks.on_begin_number(); first = false; } callbacks.on_digit(encoding.to_internal_trivial(c)); } void finish() const { callbacks.on_end_number(); } private: number_callback_adapter(const number_callback_adapter&); Callbacks& callbacks; Encoding& encoding; bool first; }; template ::iterator_category> class string_callback_adapter { public: string_callback_adapter(Callbacks& callbacks, Encoding& encoding, Iterator& cur) : callbacks(callbacks), encoding(encoding), cur(cur), run_begin(cur) {} void start_run() { run_begin = cur; } void finish_run() { callbacks.on_code_units(encoding.to_internal(run_begin, cur)); } template void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { encoding.skip_codepoint(cur, end, error_fn); } private: string_callback_adapter(const string_callback_adapter&); Callbacks& callbacks; Encoding& encoding; Iterator& cur; Iterator run_begin; }; template class string_callback_adapter { public: string_callback_adapter(Callbacks& callbacks, Encoding& encoding, Iterator& cur) : callbacks(callbacks), encoding(encoding), cur(cur) {} void start_run() {} void finish_run() {} template void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { encoding.transcode_codepoint(cur, end, boost::bind(&Callbacks::on_code_unit, boost::ref(callbacks), _1), error_fn); } private: string_callback_adapter(const string_callback_adapter&); Callbacks& callbacks; Encoding& encoding; Iterator& cur; }; template class parser { typedef detail::number_callback_adapter number_adapter; typedef detail::string_callback_adapter string_adapter; typedef detail::source source; typedef typename source::code_unit code_unit; public: parser(Callbacks& callbacks, Encoding& encoding) : callbacks(callbacks), encoding(encoding), src(encoding) {} template void set_input(const std::string& filename, const Range& r) { src.set_input(filename, r); } void finish() { skip_ws(); if (!src.done()) { parse_error("garbage after data"); } } void parse_value() { if (parse_object()) return; if (parse_array()) return; if (parse_string()) return; if (parse_boolean()) return; if (parse_null()) return; if (parse_number()) return; parse_error("expected value"); } bool parse_null() { skip_ws(); if (!have(&Encoding::is_n)) { return false; } expect(&Encoding::is_u, "expected 'null'"); expect(&Encoding::is_l, "expected 'null'"); expect(&Encoding::is_l, "expected 'null'"); callbacks.on_null(); return true; } bool parse_boolean() { skip_ws(); if (have(&Encoding::is_t)) { expect(&Encoding::is_r, "expected 'true'"); expect(&Encoding::is_u, "expected 'true'"); expect(&Encoding::is_e, "expected 'true'"); callbacks.on_boolean(true); return true; } if (have(&Encoding::is_f)) { expect(&Encoding::is_a, "expected 'false'"); expect(&Encoding::is_l, "expected 'false'"); expect(&Encoding::is_s, "expected 'false'"); expect(&Encoding::is_e, "expected 'false'"); callbacks.on_boolean(false); return true; } return false; } bool parse_number() { skip_ws(); number_adapter adapter(callbacks, encoding, src.raw_cur()); bool started = false; if (have(&Encoding::is_minus, adapter)) { started = true; } if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) { if (started) { parse_error("expected digits after -"); } return false; } parse_frac_part(adapter); parse_exp_part(adapter); adapter.finish(); return true; } bool parse_string() { skip_ws(); if (!have(&Encoding::is_quote)) { return false; } callbacks.on_begin_string(); string_adapter adapter(callbacks, encoding, src.raw_cur()); while (!encoding.is_quote(need_cur("unterminated string"))) { if (encoding.is_backslash(*src.raw_cur())) { adapter.finish_run(); next(); parse_escape(); adapter.start_run(); } else { adapter.process_codepoint(src.raw_end(), boost::bind(&parser::parse_error, this, "invalid code sequence")); } } adapter.finish_run(); callbacks.on_end_string(); next(); return true; } bool parse_array() { skip_ws(); if (!have(&Encoding::is_open_bracket)) { return false; } callbacks.on_begin_array(); skip_ws(); if (have(&Encoding::is_close_bracket)) { callbacks.on_end_array(); return true; } do { parse_value(); skip_ws(); } while (have(&Encoding::is_comma)); expect(&Encoding::is_close_bracket, "expected ']' or ','"); callbacks.on_end_array(); return true; } bool parse_object() { skip_ws(); if (!have(&Encoding::is_open_brace)) { return false; } callbacks.on_begin_object(); skip_ws(); if (have(&Encoding::is_close_brace)) { callbacks.on_end_object(); return true; } do { if (!parse_string()) { parse_error("expected key string"); } skip_ws(); expect(&Encoding::is_colon, "expected ':'"); parse_value(); skip_ws(); } while (have(&Encoding::is_comma)); expect(&Encoding::is_close_brace, "expected '}' or ','"); callbacks.on_end_object(); return true; } private: typedef typename source::encoding_predicate encoding_predicate; void parse_error(const char* msg) { src.parse_error(msg); } void next() { src.next(); } template bool have(encoding_predicate p, Action& a) { return src.have(p, a); } bool have(encoding_predicate p) { return src.have(p); } template void expect(encoding_predicate p, const char* msg, Action& a) { src.expect(p, msg, a); } void expect(encoding_predicate p, const char* msg) { src.expect(p, msg); } code_unit need_cur(const char* msg) { return src.need_cur(msg); } void skip_ws() { while (have(&Encoding::is_ws)) { } } bool parse_int_part(number_adapter& action) { if (!have(&Encoding::is_digit0, action)) { return false; } parse_digits(action); return true; } void parse_frac_part(number_adapter& action) { if (!have(&Encoding::is_dot, action)) { return; } expect(&Encoding::is_digit, "need at least one digit after '.'", action); parse_digits(action); } void parse_exp_part(number_adapter& action) { if (!have(&Encoding::is_eE, action)) { return; } have(&Encoding::is_plusminus, action); expect(&Encoding::is_digit, "need at least one digit in exponent", action); parse_digits(action); } void parse_digits(number_adapter& action) { while (have(&Encoding::is_digit, action)) { } } void parse_escape() { if (have(&Encoding::is_quote)) { feed(0x22); } else if (have(&Encoding::is_backslash)) { feed(0x5c); } else if (have(&Encoding::is_slash)) { feed(0x2f); } else if (have(&Encoding::is_b)) { feed(0x08); // backspace } else if (have(&Encoding::is_f)) { feed(0x0c); // formfeed } else if (have(&Encoding::is_n)) { feed(0x0a); // line feed } else if (have(&Encoding::is_r)) { feed(0x0d); // carriage return } else if (have(&Encoding::is_t)) { feed(0x09); // horizontal tab } else if (have(&Encoding::is_u)) { parse_codepoint_ref(); } else { parse_error("invalid escape sequence"); } } unsigned parse_hex_quad() { unsigned codepoint = 0; for (int i = 0; i < 4; ++i) { int value = encoding.decode_hexdigit( need_cur("invalid escape sequence")); if (value < 0) { parse_error("invalid escape sequence"); } codepoint *= 16; codepoint += value; next(); } return codepoint; } static bool is_surrogate_high(unsigned codepoint) { return (codepoint & 0xfc00) == 0xd800; } static bool is_surrogate_low(unsigned codepoint) { return (codepoint & 0xfc00) == 0xdc00; } static unsigned combine_surrogates(unsigned high, unsigned low) { return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff)); } void parse_codepoint_ref() { unsigned codepoint = parse_hex_quad(); if (is_surrogate_low(codepoint)) { parse_error("invalid codepoint, stray low surrogate"); } if (is_surrogate_high(codepoint)) { expect(&Encoding::is_backslash, "invalid codepoint, stray high surrogate"); expect(&Encoding::is_u, "expected codepoint reference after high surrogate"); int low = parse_hex_quad(); if (!is_surrogate_low(low)) { parse_error("expected low surrogate after high surrogate"); } codepoint = combine_surrogates(codepoint, low); } feed(codepoint); } void feed(unsigned codepoint) { encoding.feed_codepoint(codepoint, boost::bind(&Callbacks::on_code_unit, boost::ref(callbacks), _1)); } Callbacks& callbacks; Encoding& encoding; source src; }; }}}} #endif