blob: c8ed0cdc1fd02ef1b6b9231661a2ae4d6d59db58 [file] [log] [blame]
Vitaly Bukacbed2062015-08-17 12:54:05 -07001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_JSON_JSON_PARSER_H_
6#define BASE_JSON_JSON_PARSER_H_
7
8#include <string>
9
10#include "base/base_export.h"
11#include "base/basictypes.h"
12#include "base/compiler_specific.h"
Vitaly Buka8750b272015-08-18 18:39:08 -070013#include "base/gtest_prod_util.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070014#include "base/json/json_reader.h"
15#include "base/strings/string_piece.h"
16
Vitaly Bukacbed2062015-08-17 12:54:05 -070017namespace base {
18class Value;
19}
20
Vitaly Bukacbed2062015-08-17 12:54:05 -070021namespace base {
22namespace internal {
23
24class JSONParserTest;
25
26// The implementation behind the JSONReader interface. This class is not meant
27// to be used directly; it encapsulates logic that need not be exposed publicly.
28//
29// This parser guarantees O(n) time through the input string. It also optimizes
30// base::StringValue by using StringPiece where possible when returning Value
31// objects by using "hidden roots," discussed in the implementation.
32//
33// Iteration happens on the byte level, with the functions CanConsume and
34// NextChar. The conversion from byte to JSON token happens without advancing
35// the parser in GetNextToken/ParseToken, that is tokenization operates on
36// the current parser position without advancing.
37//
38// Built on top of these are a family of Consume functions that iterate
39// internally. Invariant: on entry of a Consume function, the parser is wound
40// to the first byte of a valid JSON token. On exit, it is on the last byte
41// of a token, such that the next iteration of the parser will be at the byte
42// immediately following the token, which would likely be the first byte of the
43// next token.
44class BASE_EXPORT_PRIVATE JSONParser {
45 public:
46 explicit JSONParser(int options);
47 ~JSONParser();
48
49 // Parses the input string according to the set options and returns the
50 // result as a Value owned by the caller.
51 Value* Parse(const StringPiece& input);
52
53 // Returns the error code.
54 JSONReader::JsonParseError error_code() const;
55
56 // Returns the human-friendly error message.
57 std::string GetErrorMessage() const;
58
59 private:
60 enum Token {
61 T_OBJECT_BEGIN, // {
62 T_OBJECT_END, // }
63 T_ARRAY_BEGIN, // [
64 T_ARRAY_END, // ]
65 T_STRING,
66 T_NUMBER,
67 T_BOOL_TRUE, // true
68 T_BOOL_FALSE, // false
69 T_NULL, // null
70 T_LIST_SEPARATOR, // ,
71 T_OBJECT_PAIR_SEPARATOR, // :
72 T_END_OF_INPUT,
73 T_INVALID_TOKEN,
74 };
75
76 // A helper class used for parsing strings. One optimization performed is to
77 // create base::Value with a StringPiece to avoid unnecessary std::string
78 // copies. This is not possible if the input string needs to be decoded from
79 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
80 // This class centralizes that logic.
81 class StringBuilder {
82 public:
83 // Empty constructor. Used for creating a builder with which to Swap().
84 StringBuilder();
85
86 // |pos| is the beginning of an input string, excluding the |"|.
87 explicit StringBuilder(const char* pos);
88
89 ~StringBuilder();
90
91 // Swaps the contents of |other| with this.
92 void Swap(StringBuilder* other);
93
94 // Either increases the |length_| of the string or copies the character if
95 // the StringBuilder has been converted. |c| must be in the basic ASCII
96 // plane; all other characters need to be in UTF-8 units, appended with
97 // AppendString below.
98 void Append(const char& c);
99
100 // Appends a string to the std::string. Must be Convert()ed to use.
101 void AppendString(const std::string& str);
102
103 // Converts the builder from its default StringPiece to a full std::string,
104 // performing a copy. Once a builder is converted, it cannot be made a
105 // StringPiece again.
106 void Convert();
107
108 // Returns whether the builder can be converted to a StringPiece.
109 bool CanBeStringPiece() const;
110
111 // Returns the StringPiece representation. Returns an empty piece if it
112 // cannot be converted.
113 StringPiece AsStringPiece();
114
115 // Returns the builder as a std::string.
116 const std::string& AsString();
117
118 private:
119 // The beginning of the input string.
120 const char* pos_;
121
122 // Number of bytes in |pos_| that make up the string being built.
123 size_t length_;
124
125 // The copied string representation. NULL until Convert() is called.
126 // Strong. scoped_ptr<T> has too much of an overhead here.
127 std::string* string_;
128 };
129
130 // Quick check that the stream has capacity to consume |length| more bytes.
131 bool CanConsume(int length);
132
133 // The basic way to consume a single character in the stream. Consumes one
134 // byte of the input stream and returns a pointer to the rest of it.
135 const char* NextChar();
136
137 // Performs the equivalent of NextChar N times.
138 void NextNChars(int n);
139
140 // Skips over whitespace and comments to find the next token in the stream.
141 // This does not advance the parser for non-whitespace or comment chars.
142 Token GetNextToken();
143
144 // Consumes whitespace characters and comments until the next non-that is
145 // encountered.
146 void EatWhitespaceAndComments();
147 // Helper function that consumes a comment, assuming that the parser is
148 // currently wound to a '/'.
149 bool EatComment();
150
151 // Calls GetNextToken() and then ParseToken(). Caller owns the result.
152 Value* ParseNextToken();
153
154 // Takes a token that represents the start of a Value ("a structural token"
155 // in RFC terms) and consumes it, returning the result as an object the
156 // caller owns.
157 Value* ParseToken(Token token);
158
159 // Assuming that the parser is currently wound to '{', this parses a JSON
160 // object into a DictionaryValue.
161 Value* ConsumeDictionary();
162
163 // Assuming that the parser is wound to '[', this parses a JSON list into a
164 // ListValue.
165 Value* ConsumeList();
166
167 // Calls through ConsumeStringRaw and wraps it in a value.
168 Value* ConsumeString();
169
170 // Assuming that the parser is wound to a double quote, this parses a string,
171 // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
172 // success and Swap()s the result into |out|. Returns false on failure with
173 // error information set.
174 bool ConsumeStringRaw(StringBuilder* out);
175 // Helper function for ConsumeStringRaw() that consumes the next four or 10
176 // bytes (parser is wound to the first character of a HEX sequence, with the
177 // potential for consuming another \uXXXX for a surrogate). Returns true on
178 // success and places the UTF8 code units in |dest_string|, and false on
179 // failure.
180 bool DecodeUTF16(std::string* dest_string);
181 // Helper function for ConsumeStringRaw() that takes a single code point,
182 // decodes it into UTF-8 units, and appends it to the given builder. The
183 // point must be valid.
184 void DecodeUTF8(const int32& point, StringBuilder* dest);
185
186 // Assuming that the parser is wound to the start of a valid JSON number,
187 // this parses and converts it to either an int or double value.
188 Value* ConsumeNumber();
189 // Helper that reads characters that are ints. Returns true if a number was
190 // read and false on error.
191 bool ReadInt(bool allow_leading_zeros);
192
193 // Consumes the literal values of |true|, |false|, and |null|, assuming the
194 // parser is wound to the first character of any of those.
195 Value* ConsumeLiteral();
196
197 // Compares two string buffers of a given length.
198 static bool StringsAreEqual(const char* left, const char* right, size_t len);
199
200 // Sets the error information to |code| at the current column, based on
201 // |index_| and |index_last_line_|, with an optional positive/negative
202 // adjustment by |column_adjust|.
203 void ReportError(JSONReader::JsonParseError code, int column_adjust);
204
205 // Given the line and column number of an error, formats one of the error
206 // message contants from json_reader.h for human display.
207 static std::string FormatErrorMessage(int line, int column,
208 const std::string& description);
209
210 // base::JSONParserOptions that control parsing.
211 int options_;
212
213 // Pointer to the start of the input data.
214 const char* start_pos_;
215
216 // Pointer to the current position in the input data. Equivalent to
217 // |start_pos_ + index_|.
218 const char* pos_;
219
220 // Pointer to the last character of the input data.
221 const char* end_pos_;
222
223 // The index in the input stream to which the parser is wound.
224 int index_;
225
226 // The number of times the parser has recursed (current stack depth).
227 int stack_depth_;
228
229 // The line number that the parser is at currently.
230 int line_number_;
231
232 // The last value of |index_| on the previous line.
233 int index_last_line_;
234
235 // Error information.
236 JSONReader::JsonParseError error_code_;
237 int error_line_;
238 int error_column_;
239
240 friend class JSONParserTest;
241 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
242 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
243 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
244 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
245 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
246 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
247 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
248
249 DISALLOW_COPY_AND_ASSIGN(JSONParser);
250};
251
252} // namespace internal
253} // namespace base
254
255#endif // BASE_JSON_JSON_PARSER_H_