| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/json/json_parser.h" |
| |
| #include <cmath> |
| #include <utility> |
| |
| #include "base/logging.h" |
| #include "base/macros.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_piece.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/strings/utf_string_conversion_utils.h" |
| #include "base/third_party/icu/icu_utf.h" |
| #include "base/values.h" |
| |
| namespace base { |
| namespace internal { |
| |
| namespace { |
| |
| const int kStackMaxDepth = 100; |
| |
| const int32_t kExtendedASCIIStart = 0x80; |
| |
| // DictionaryHiddenRootValue and ListHiddenRootValue are used in conjunction |
| // with JSONStringValue as an optimization for reducing the number of string |
| // copies. When this optimization is active, the parser uses a hidden root to |
| // keep the original JSON input string live and creates JSONStringValue children |
| // holding StringPiece references to the input string, avoiding about 2/3rds of |
| // string memory copies. The real root value is Swap()ed into the new instance. |
| class DictionaryHiddenRootValue : public DictionaryValue { |
| public: |
| DictionaryHiddenRootValue(std::unique_ptr<std::string> json, |
| std::unique_ptr<Value> root) |
| : json_(std::move(json)) { |
| DCHECK(root->IsType(Value::TYPE_DICTIONARY)); |
| DictionaryValue::Swap(static_cast<DictionaryValue*>(root.get())); |
| } |
| |
| void Swap(DictionaryValue* other) override { |
| DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; |
| |
| // First deep copy to convert JSONStringValue to std::string and swap that |
| // copy with |other|, which contains the new contents of |this|. |
| std::unique_ptr<DictionaryValue> copy(CreateDeepCopy()); |
| copy->Swap(other); |
| |
| // Then erase the contents of the current dictionary and swap in the |
| // new contents, originally from |other|. |
| Clear(); |
| json_.reset(); |
| DictionaryValue::Swap(copy.get()); |
| } |
| |
| // Not overriding DictionaryValue::Remove because it just calls through to |
| // the method below. |
| |
| bool RemoveWithoutPathExpansion(const std::string& key, |
| std::unique_ptr<Value>* out) override { |
| // If the caller won't take ownership of the removed value, just call up. |
| if (!out) |
| return DictionaryValue::RemoveWithoutPathExpansion(key, out); |
| |
| DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; |
| |
| // Otherwise, remove the value while its still "owned" by this and copy it |
| // to convert any JSONStringValues to std::string. |
| std::unique_ptr<Value> out_owned; |
| if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) |
| return false; |
| |
| *out = out_owned->CreateDeepCopy(); |
| |
| return true; |
| } |
| |
| private: |
| std::unique_ptr<std::string> json_; |
| |
| DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); |
| }; |
| |
| class ListHiddenRootValue : public ListValue { |
| public: |
| ListHiddenRootValue(std::unique_ptr<std::string> json, |
| std::unique_ptr<Value> root) |
| : json_(std::move(json)) { |
| DCHECK(root->IsType(Value::TYPE_LIST)); |
| ListValue::Swap(static_cast<ListValue*>(root.get())); |
| } |
| |
| void Swap(ListValue* other) override { |
| DVLOG(1) << "Swap()ing a ListValue inefficiently."; |
| |
| // First deep copy to convert JSONStringValue to std::string and swap that |
| // copy with |other|, which contains the new contents of |this|. |
| std::unique_ptr<ListValue> copy(CreateDeepCopy()); |
| copy->Swap(other); |
| |
| // Then erase the contents of the current list and swap in the new contents, |
| // originally from |other|. |
| Clear(); |
| json_.reset(); |
| ListValue::Swap(copy.get()); |
| } |
| |
| bool Remove(size_t index, std::unique_ptr<Value>* out) override { |
| // If the caller won't take ownership of the removed value, just call up. |
| if (!out) |
| return ListValue::Remove(index, out); |
| |
| DVLOG(1) << "Remove()ing from a ListValue inefficiently."; |
| |
| // Otherwise, remove the value while its still "owned" by this and copy it |
| // to convert any JSONStringValues to std::string. |
| std::unique_ptr<Value> out_owned; |
| if (!ListValue::Remove(index, &out_owned)) |
| return false; |
| |
| *out = out_owned->CreateDeepCopy(); |
| |
| return true; |
| } |
| |
| private: |
| std::unique_ptr<std::string> json_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); |
| }; |
| |
| // A variant on StringValue that uses StringPiece instead of copying the string |
| // into the Value. This can only be stored in a child of hidden root (above), |
| // otherwise the referenced string will not be guaranteed to outlive it. |
| class JSONStringValue : public Value { |
| public: |
| explicit JSONStringValue(StringPiece piece) |
| : Value(TYPE_STRING), string_piece_(piece) {} |
| |
| // Overridden from Value: |
| bool GetAsString(std::string* out_value) const override { |
| string_piece_.CopyToString(out_value); |
| return true; |
| } |
| Value* DeepCopy() const override { |
| return new StringValue(string_piece_.as_string()); |
| } |
| bool Equals(const Value* other) const override { |
| std::string other_string; |
| return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && |
| StringPiece(other_string) == string_piece_; |
| } |
| |
| private: |
| // The location in the original input stream. |
| StringPiece string_piece_; |
| |
| DISALLOW_COPY_AND_ASSIGN(JSONStringValue); |
| }; |
| |
| // Simple class that checks for maximum recursion/"stack overflow." |
| class StackMarker { |
| public: |
| explicit StackMarker(int* depth) : depth_(depth) { |
| ++(*depth_); |
| DCHECK_LE(*depth_, kStackMaxDepth); |
| } |
| ~StackMarker() { |
| --(*depth_); |
| } |
| |
| bool IsTooDeep() const { |
| return *depth_ >= kStackMaxDepth; |
| } |
| |
| private: |
| int* const depth_; |
| |
| DISALLOW_COPY_AND_ASSIGN(StackMarker); |
| }; |
| |
| } // namespace |
| |
| JSONParser::JSONParser(int options) |
| : options_(options), |
| start_pos_(nullptr), |
| pos_(nullptr), |
| end_pos_(nullptr), |
| index_(0), |
| stack_depth_(0), |
| line_number_(0), |
| index_last_line_(0), |
| error_code_(JSONReader::JSON_NO_ERROR), |
| error_line_(0), |
| error_column_(0) { |
| } |
| |
| JSONParser::~JSONParser() { |
| } |
| |
| std::unique_ptr<Value> JSONParser::Parse(StringPiece input) { |
| std::unique_ptr<std::string> input_copy; |
| // If the children of a JSON root can be detached, then hidden roots cannot |
| // be used, so do not bother copying the input because StringPiece will not |
| // be used anywhere. |
| if (!(options_ & JSON_DETACHABLE_CHILDREN)) { |
| input_copy = MakeUnique<std::string>(input.as_string()); |
| start_pos_ = input_copy->data(); |
| } else { |
| start_pos_ = input.data(); |
| } |
| pos_ = start_pos_; |
| end_pos_ = start_pos_ + input.length(); |
| index_ = 0; |
| line_number_ = 1; |
| index_last_line_ = 0; |
| |
| error_code_ = JSONReader::JSON_NO_ERROR; |
| error_line_ = 0; |
| error_column_ = 0; |
| |
| // When the input JSON string starts with a UTF-8 Byte-Order-Mark |
| // <0xEF 0xBB 0xBF>, advance the start position to avoid the |
| // ParseNextToken function mis-treating a Unicode BOM as an invalid |
| // character and returning NULL. |
| if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF && |
| static_cast<uint8_t>(*(pos_ + 1)) == 0xBB && |
| static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) { |
| NextNChars(3); |
| } |
| |
| // Parse the first and any nested tokens. |
| std::unique_ptr<Value> root(ParseNextToken()); |
| if (!root) |
| return nullptr; |
| |
| // Make sure the input stream is at an end. |
| if (GetNextToken() != T_END_OF_INPUT) { |
| if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { |
| ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); |
| return nullptr; |
| } |
| } |
| |
| // Dictionaries and lists can contain JSONStringValues, so wrap them in a |
| // hidden root. |
| if (!(options_ & JSON_DETACHABLE_CHILDREN)) { |
| if (root->IsType(Value::TYPE_DICTIONARY)) { |
| return MakeUnique<DictionaryHiddenRootValue>(std::move(input_copy), |
| std::move(root)); |
| } |
| if (root->IsType(Value::TYPE_LIST)) { |
| return MakeUnique<ListHiddenRootValue>(std::move(input_copy), |
| std::move(root)); |
| } |
| if (root->IsType(Value::TYPE_STRING)) { |
| // A string type could be a JSONStringValue, but because there's no |
| // corresponding HiddenRootValue, the memory will be lost. Deep copy to |
| // preserve it. |
| return root->CreateDeepCopy(); |
| } |
| } |
| |
| // All other values can be returned directly. |
| return root; |
| } |
| |
| JSONReader::JsonParseError JSONParser::error_code() const { |
| return error_code_; |
| } |
| |
| std::string JSONParser::GetErrorMessage() const { |
| return FormatErrorMessage(error_line_, error_column_, |
| JSONReader::ErrorCodeToString(error_code_)); |
| } |
| |
| int JSONParser::error_line() const { |
| return error_line_; |
| } |
| |
| int JSONParser::error_column() const { |
| return error_column_; |
| } |
| |
| // StringBuilder /////////////////////////////////////////////////////////////// |
| |
| JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {} |
| |
| JSONParser::StringBuilder::StringBuilder(const char* pos) |
| : pos_(pos), |
| length_(0), |
| string_(nullptr) { |
| } |
| |
| void JSONParser::StringBuilder::Swap(StringBuilder* other) { |
| std::swap(other->string_, string_); |
| std::swap(other->pos_, pos_); |
| std::swap(other->length_, length_); |
| } |
| |
| JSONParser::StringBuilder::~StringBuilder() { |
| delete string_; |
| } |
| |
| void JSONParser::StringBuilder::Append(const char& c) { |
| DCHECK_GE(c, 0); |
| DCHECK_LT(static_cast<unsigned char>(c), 128); |
| |
| if (string_) |
| string_->push_back(c); |
| else |
| ++length_; |
| } |
| |
| void JSONParser::StringBuilder::AppendString(const std::string& str) { |
| DCHECK(string_); |
| string_->append(str); |
| } |
| |
| void JSONParser::StringBuilder::Convert() { |
| if (string_) |
| return; |
| string_ = new std::string(pos_, length_); |
| } |
| |
| bool JSONParser::StringBuilder::CanBeStringPiece() const { |
| return !string_; |
| } |
| |
| StringPiece JSONParser::StringBuilder::AsStringPiece() { |
| if (string_) |
| return StringPiece(); |
| return StringPiece(pos_, length_); |
| } |
| |
| const std::string& JSONParser::StringBuilder::AsString() { |
| if (!string_) |
| Convert(); |
| return *string_; |
| } |
| |
| // JSONParser private ////////////////////////////////////////////////////////// |
| |
| inline bool JSONParser::CanConsume(int length) { |
| return pos_ + length <= end_pos_; |
| } |
| |
| const char* JSONParser::NextChar() { |
| DCHECK(CanConsume(1)); |
| ++index_; |
| ++pos_; |
| return pos_; |
| } |
| |
| void JSONParser::NextNChars(int n) { |
| DCHECK(CanConsume(n)); |
| index_ += n; |
| pos_ += n; |
| } |
| |
| JSONParser::Token JSONParser::GetNextToken() { |
| EatWhitespaceAndComments(); |
| if (!CanConsume(1)) |
| return T_END_OF_INPUT; |
| |
| switch (*pos_) { |
| case '{': |
| return T_OBJECT_BEGIN; |
| case '}': |
| return T_OBJECT_END; |
| case '[': |
| return T_ARRAY_BEGIN; |
| case ']': |
| return T_ARRAY_END; |
| case '"': |
| return T_STRING; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| case '-': |
| return T_NUMBER; |
| case 't': |
| return T_BOOL_TRUE; |
| case 'f': |
| return T_BOOL_FALSE; |
| case 'n': |
| return T_NULL; |
| case ',': |
| return T_LIST_SEPARATOR; |
| case ':': |
| return T_OBJECT_PAIR_SEPARATOR; |
| default: |
| return T_INVALID_TOKEN; |
| } |
| } |
| |
| void JSONParser::EatWhitespaceAndComments() { |
| while (pos_ < end_pos_) { |
| switch (*pos_) { |
| case '\r': |
| case '\n': |
| index_last_line_ = index_; |
| // Don't increment line_number_ twice for "\r\n". |
| if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r')) |
| ++line_number_; |
| // Fall through. |
| case ' ': |
| case '\t': |
| NextChar(); |
| break; |
| case '/': |
| if (!EatComment()) |
| return; |
| break; |
| default: |
| return; |
| } |
| } |
| } |
| |
| bool JSONParser::EatComment() { |
| if (*pos_ != '/' || !CanConsume(1)) |
| return false; |
| |
| char next_char = *NextChar(); |
| if (next_char == '/') { |
| // Single line comment, read to newline. |
| while (CanConsume(1)) { |
| next_char = *NextChar(); |
| if (next_char == '\n' || next_char == '\r') |
| return true; |
| } |
| } else if (next_char == '*') { |
| char previous_char = '\0'; |
| // Block comment, read until end marker. |
| while (CanConsume(1)) { |
| next_char = *NextChar(); |
| if (previous_char == '*' && next_char == '/') { |
| // EatWhitespaceAndComments will inspect pos_, which will still be on |
| // the last / of the comment, so advance once more (which may also be |
| // end of input). |
| NextChar(); |
| return true; |
| } |
| previous_char = next_char; |
| } |
| |
| // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. |
| } |
| |
| return false; |
| } |
| |
| Value* JSONParser::ParseNextToken() { |
| return ParseToken(GetNextToken()); |
| } |
| |
| Value* JSONParser::ParseToken(Token token) { |
| switch (token) { |
| case T_OBJECT_BEGIN: |
| return ConsumeDictionary(); |
| case T_ARRAY_BEGIN: |
| return ConsumeList(); |
| case T_STRING: |
| return ConsumeString(); |
| case T_NUMBER: |
| return ConsumeNumber(); |
| case T_BOOL_TRUE: |
| case T_BOOL_FALSE: |
| case T_NULL: |
| return ConsumeLiteral(); |
| default: |
| ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| return nullptr; |
| } |
| } |
| |
| Value* JSONParser::ConsumeDictionary() { |
| if (*pos_ != '{') { |
| ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| return nullptr; |
| } |
| |
| StackMarker depth_check(&stack_depth_); |
| if (depth_check.IsTooDeep()) { |
| ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); |
| return nullptr; |
| } |
| |
| std::unique_ptr<DictionaryValue> dict(new DictionaryValue); |
| |
| NextChar(); |
| Token token = GetNextToken(); |
| while (token != T_OBJECT_END) { |
| if (token != T_STRING) { |
| ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); |
| return nullptr; |
| } |
| |
| // First consume the key. |
| StringBuilder key; |
| if (!ConsumeStringRaw(&key)) { |
| return nullptr; |
| } |
| |
| // Read the separator. |
| NextChar(); |
| token = GetNextToken(); |
| if (token != T_OBJECT_PAIR_SEPARATOR) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| |
| // The next token is the value. Ownership transfers to |dict|. |
| NextChar(); |
| Value* value = ParseNextToken(); |
| if (!value) { |
| // ReportError from deeper level. |
| return nullptr; |
| } |
| |
| dict->SetWithoutPathExpansion(key.AsString(), value); |
| |
| NextChar(); |
| token = GetNextToken(); |
| if (token == T_LIST_SEPARATOR) { |
| NextChar(); |
| token = GetNextToken(); |
| if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { |
| ReportError(JSONReader::JSON_TRAILING_COMMA, 1); |
| return nullptr; |
| } |
| } else if (token != T_OBJECT_END) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); |
| return nullptr; |
| } |
| } |
| |
| return dict.release(); |
| } |
| |
| Value* JSONParser::ConsumeList() { |
| if (*pos_ != '[') { |
| ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| return nullptr; |
| } |
| |
| StackMarker depth_check(&stack_depth_); |
| if (depth_check.IsTooDeep()) { |
| ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); |
| return nullptr; |
| } |
| |
| std::unique_ptr<ListValue> list(new ListValue); |
| |
| NextChar(); |
| Token token = GetNextToken(); |
| while (token != T_ARRAY_END) { |
| Value* item = ParseToken(token); |
| if (!item) { |
| // ReportError from deeper level. |
| return nullptr; |
| } |
| |
| list->Append(item); |
| |
| NextChar(); |
| token = GetNextToken(); |
| if (token == T_LIST_SEPARATOR) { |
| NextChar(); |
| token = GetNextToken(); |
| if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { |
| ReportError(JSONReader::JSON_TRAILING_COMMA, 1); |
| return nullptr; |
| } |
| } else if (token != T_ARRAY_END) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| } |
| |
| return list.release(); |
| } |
| |
| Value* JSONParser::ConsumeString() { |
| StringBuilder string; |
| if (!ConsumeStringRaw(&string)) |
| return nullptr; |
| |
| // Create the Value representation, using a hidden root, if configured |
| // to do so, and if the string can be represented by StringPiece. |
| if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) |
| return new JSONStringValue(string.AsStringPiece()); |
| |
| if (string.CanBeStringPiece()) |
| string.Convert(); |
| return new StringValue(string.AsString()); |
| } |
| |
| bool JSONParser::ConsumeStringRaw(StringBuilder* out) { |
| if (*pos_ != '"') { |
| ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| return false; |
| } |
| |
| // StringBuilder will internally build a StringPiece unless a UTF-16 |
| // conversion occurs, at which point it will perform a copy into a |
| // std::string. |
| StringBuilder string(NextChar()); |
| |
| int length = end_pos_ - start_pos_; |
| int32_t next_char = 0; |
| |
| while (CanConsume(1)) { |
| pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. |
| CBU8_NEXT(start_pos_, index_, length, next_char); |
| if (next_char < 0 || !IsValidCharacter(next_char)) { |
| ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); |
| return false; |
| } |
| |
| if (next_char == '"') { |
| --index_; // Rewind by one because of CBU8_NEXT. |
| out->Swap(&string); |
| return true; |
| } |
| |
| // If this character is not an escape sequence... |
| if (next_char != '\\') { |
| if (next_char < kExtendedASCIIStart) |
| string.Append(static_cast<char>(next_char)); |
| else |
| DecodeUTF8(next_char, &string); |
| } else { |
| // And if it is an escape sequence, the input string will be adjusted |
| // (either by combining the two characters of an encoded escape sequence, |
| // or with a UTF conversion), so using StringPiece isn't possible -- force |
| // a conversion. |
| string.Convert(); |
| |
| if (!CanConsume(1)) { |
| ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
| return false; |
| } |
| |
| switch (*NextChar()) { |
| // Allowed esape sequences: |
| case 'x': { // UTF-8 sequence. |
| // UTF-8 \x escape sequences are not allowed in the spec, but they |
| // are supported here for backwards-compatiblity with the old parser. |
| if (!CanConsume(2)) { |
| ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); |
| return false; |
| } |
| |
| int hex_digit = 0; |
| if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { |
| ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); |
| return false; |
| } |
| NextChar(); |
| |
| if (hex_digit < kExtendedASCIIStart) |
| string.Append(static_cast<char>(hex_digit)); |
| else |
| DecodeUTF8(hex_digit, &string); |
| break; |
| } |
| case 'u': { // UTF-16 sequence. |
| // UTF units are of the form \uXXXX. |
| if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. |
| ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
| return false; |
| } |
| |
| // Skip the 'u'. |
| NextChar(); |
| |
| std::string utf8_units; |
| if (!DecodeUTF16(&utf8_units)) { |
| ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); |
| return false; |
| } |
| |
| string.AppendString(utf8_units); |
| break; |
| } |
| case '"': |
| string.Append('"'); |
| break; |
| case '\\': |
| string.Append('\\'); |
| break; |
| case '/': |
| string.Append('/'); |
| break; |
| case 'b': |
| string.Append('\b'); |
| break; |
| case 'f': |
| string.Append('\f'); |
| break; |
| case 'n': |
| string.Append('\n'); |
| break; |
| case 'r': |
| string.Append('\r'); |
| break; |
| case 't': |
| string.Append('\t'); |
| break; |
| case 'v': // Not listed as valid escape sequence in the RFC. |
| string.Append('\v'); |
| break; |
| // All other escape squences are illegal. |
| default: |
| ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); |
| return false; |
| } |
| } |
| } |
| |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); |
| return false; |
| } |
| |
| // Entry is at the first X in \uXXXX. |
| bool JSONParser::DecodeUTF16(std::string* dest_string) { |
| if (!CanConsume(4)) |
| return false; |
| |
| // This is a 32-bit field because the shift operations in the |
| // conversion process below cause MSVC to error about "data loss." |
| // This only stores UTF-16 code units, though. |
| // Consume the UTF-16 code unit, which may be a high surrogate. |
| int code_unit16_high = 0; |
| if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) |
| return false; |
| |
| // Only add 3, not 4, because at the end of this iteration, the parser has |
| // finished working with the last digit of the UTF sequence, meaning that |
| // the next iteration will advance to the next byte. |
| NextNChars(3); |
| |
| // Used to convert the UTF-16 code units to a code point and then to a UTF-8 |
| // code unit sequence. |
| char code_unit8[8] = { 0 }; |
| size_t offset = 0; |
| |
| // If this is a high surrogate, consume the next code unit to get the |
| // low surrogate. |
| if (CBU16_IS_SURROGATE(code_unit16_high)) { |
| // Make sure this is the high surrogate. If not, it's an encoding |
| // error. |
| if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) |
| return false; |
| |
| // Make sure that the token has more characters to consume the |
| // lower surrogate. |
| if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. |
| return false; |
| if (*NextChar() != '\\' || *NextChar() != 'u') |
| return false; |
| |
| NextChar(); // Read past 'u'. |
| int code_unit16_low = 0; |
| if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) |
| return false; |
| |
| NextNChars(3); |
| |
| if (!CBU16_IS_TRAIL(code_unit16_low)) { |
| return false; |
| } |
| |
| uint32_t code_point = |
| CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low); |
| if (!IsValidCharacter(code_point)) |
| return false; |
| |
| offset = 0; |
| CBU8_APPEND_UNSAFE(code_unit8, offset, code_point); |
| } else { |
| // Not a surrogate. |
| DCHECK(CBU16_IS_SINGLE(code_unit16_high)); |
| if (!IsValidCharacter(code_unit16_high)) |
| return false; |
| |
| CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high); |
| } |
| |
| dest_string->append(code_unit8); |
| return true; |
| } |
| |
| void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) { |
| DCHECK(IsValidCharacter(point)); |
| |
| // Anything outside of the basic ASCII plane will need to be decoded from |
| // int32_t to a multi-byte sequence. |
| if (point < kExtendedASCIIStart) { |
| dest->Append(static_cast<char>(point)); |
| } else { |
| char utf8_units[4] = { 0 }; |
| int offset = 0; |
| CBU8_APPEND_UNSAFE(utf8_units, offset, point); |
| dest->Convert(); |
| // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be |
| // zero terminated at this point. |offset| contains the correct length. |
| dest->AppendString(std::string(utf8_units, offset)); |
| } |
| } |
| |
| Value* JSONParser::ConsumeNumber() { |
| const char* num_start = pos_; |
| const int start_index = index_; |
| int end_index = start_index; |
| |
| if (*pos_ == '-') |
| NextChar(); |
| |
| if (!ReadInt(false)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| end_index = index_; |
| |
| // The optional fraction part. |
| if (*pos_ == '.') { |
| if (!CanConsume(1)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| NextChar(); |
| if (!ReadInt(true)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| end_index = index_; |
| } |
| |
| // Optional exponent part. |
| if (*pos_ == 'e' || *pos_ == 'E') { |
| NextChar(); |
| if (*pos_ == '-' || *pos_ == '+') |
| NextChar(); |
| if (!ReadInt(true)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| end_index = index_; |
| } |
| |
| // ReadInt is greedy because numbers have no easily detectable sentinel, |
| // so save off where the parser should be on exit (see Consume invariant at |
| // the top of the header), then make sure the next token is one which is |
| // valid. |
| const char* exit_pos = pos_ - 1; |
| int exit_index = index_ - 1; |
| |
| switch (GetNextToken()) { |
| case T_OBJECT_END: |
| case T_ARRAY_END: |
| case T_LIST_SEPARATOR: |
| case T_END_OF_INPUT: |
| break; |
| default: |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| |
| pos_ = exit_pos; |
| index_ = exit_index; |
| |
| StringPiece num_string(num_start, end_index - start_index); |
| |
| int num_int; |
| if (StringToInt(num_string, &num_int)) |
| return new FundamentalValue(num_int); |
| |
| double num_double; |
| if (StringToDouble(num_string.as_string(), &num_double) && |
| std::isfinite(num_double)) { |
| return new FundamentalValue(num_double); |
| } |
| |
| return nullptr; |
| } |
| |
| bool JSONParser::ReadInt(bool allow_leading_zeros) { |
| char first = *pos_; |
| int len = 0; |
| |
| char c = first; |
| while (CanConsume(1) && std::isdigit(c)) { |
| c = *NextChar(); |
| ++len; |
| } |
| |
| if (len == 0) |
| return false; |
| |
| if (!allow_leading_zeros && len > 1 && first == '0') |
| return false; |
| |
| return true; |
| } |
| |
| Value* JSONParser::ConsumeLiteral() { |
| switch (*pos_) { |
| case 't': { |
| const char kTrueLiteral[] = "true"; |
| const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); |
| if (!CanConsume(kTrueLen - 1) || |
| !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| NextNChars(kTrueLen - 1); |
| return new FundamentalValue(true); |
| } |
| case 'f': { |
| const char kFalseLiteral[] = "false"; |
| const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); |
| if (!CanConsume(kFalseLen - 1) || |
| !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| NextNChars(kFalseLen - 1); |
| return new FundamentalValue(false); |
| } |
| case 'n': { |
| const char kNullLiteral[] = "null"; |
| const int kNullLen = static_cast<int>(strlen(kNullLiteral)); |
| if (!CanConsume(kNullLen - 1) || |
| !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { |
| ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); |
| return nullptr; |
| } |
| NextNChars(kNullLen - 1); |
| return Value::CreateNullValue().release(); |
| } |
| default: |
| ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); |
| return nullptr; |
| } |
| } |
| |
| // static |
| bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { |
| return strncmp(one, two, len) == 0; |
| } |
| |
| void JSONParser::ReportError(JSONReader::JsonParseError code, |
| int column_adjust) { |
| error_code_ = code; |
| error_line_ = line_number_; |
| error_column_ = index_ - index_last_line_ + column_adjust; |
| } |
| |
| // static |
| std::string JSONParser::FormatErrorMessage(int line, int column, |
| const std::string& description) { |
| if (line || column) { |
| return StringPrintf("Line: %i, column: %i, %s", |
| line, column, description.c_str()); |
| } |
| return description; |
| } |
| |
| } // namespace internal |
| } // namespace base |