blob: fc04594a14924340c32123da632b4150210a0917 [file] [log] [blame]
Vitaly Bukacbed2062015-08-17 12:54:05 -07001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_JSON_JSON_PARSER_H_
6#define BASE_JSON_JSON_PARSER_H_
7
Alex Vakulenko674f0eb2016-01-20 08:10:48 -08008#include <stddef.h>
9#include <stdint.h>
10
Vitaly Bukacbed2062015-08-17 12:54:05 -070011#include <string>
12
13#include "base/base_export.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070014#include "base/compiler_specific.h"
Vitaly Buka8750b272015-08-18 18:39:08 -070015#include "base/gtest_prod_util.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070016#include "base/json/json_reader.h"
Alex Vakulenko674f0eb2016-01-20 08:10:48 -080017#include "base/macros.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070018#include "base/strings/string_piece.h"
19
Vitaly Bukacbed2062015-08-17 12:54:05 -070020namespace base {
Vitaly Bukacbed2062015-08-17 12:54:05 -070021
Alex Vakulenko674f0eb2016-01-20 08:10:48 -080022class Value;
23
Vitaly Bukacbed2062015-08-17 12:54:05 -070024namespace internal {
25
26class JSONParserTest;
27
28// The implementation behind the JSONReader interface. This class is not meant
29// to be used directly; it encapsulates logic that need not be exposed publicly.
30//
31// This parser guarantees O(n) time through the input string. It also optimizes
32// base::StringValue by using StringPiece where possible when returning Value
33// objects by using "hidden roots," discussed in the implementation.
34//
35// Iteration happens on the byte level, with the functions CanConsume and
36// NextChar. The conversion from byte to JSON token happens without advancing
37// the parser in GetNextToken/ParseToken, that is tokenization operates on
38// the current parser position without advancing.
39//
40// Built on top of these are a family of Consume functions that iterate
41// internally. Invariant: on entry of a Consume function, the parser is wound
42// to the first byte of a valid JSON token. On exit, it is on the last byte
43// of a token, such that the next iteration of the parser will be at the byte
44// immediately following the token, which would likely be the first byte of the
45// next token.
Alex Vakulenko674f0eb2016-01-20 08:10:48 -080046class BASE_EXPORT JSONParser {
Vitaly Bukacbed2062015-08-17 12:54:05 -070047 public:
48 explicit JSONParser(int options);
49 ~JSONParser();
50
51 // Parses the input string according to the set options and returns the
52 // result as a Value owned by the caller.
53 Value* Parse(const StringPiece& input);
54
55 // Returns the error code.
56 JSONReader::JsonParseError error_code() const;
57
58 // Returns the human-friendly error message.
59 std::string GetErrorMessage() const;
60
Alex Vakulenko674f0eb2016-01-20 08:10:48 -080061 // Returns the error line number if parse error happened. Otherwise always
62 // returns 0.
63 int error_line() const;
64
65 // Returns the error column number if parse error happened. Otherwise always
66 // returns 0.
67 int error_column() const;
68
Vitaly Bukacbed2062015-08-17 12:54:05 -070069 private:
70 enum Token {
71 T_OBJECT_BEGIN, // {
72 T_OBJECT_END, // }
73 T_ARRAY_BEGIN, // [
74 T_ARRAY_END, // ]
75 T_STRING,
76 T_NUMBER,
77 T_BOOL_TRUE, // true
78 T_BOOL_FALSE, // false
79 T_NULL, // null
80 T_LIST_SEPARATOR, // ,
81 T_OBJECT_PAIR_SEPARATOR, // :
82 T_END_OF_INPUT,
83 T_INVALID_TOKEN,
84 };
85
86 // A helper class used for parsing strings. One optimization performed is to
87 // create base::Value with a StringPiece to avoid unnecessary std::string
88 // copies. This is not possible if the input string needs to be decoded from
89 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
90 // This class centralizes that logic.
91 class StringBuilder {
92 public:
93 // Empty constructor. Used for creating a builder with which to Swap().
94 StringBuilder();
95
96 // |pos| is the beginning of an input string, excluding the |"|.
97 explicit StringBuilder(const char* pos);
98
99 ~StringBuilder();
100
101 // Swaps the contents of |other| with this.
102 void Swap(StringBuilder* other);
103
104 // Either increases the |length_| of the string or copies the character if
105 // the StringBuilder has been converted. |c| must be in the basic ASCII
106 // plane; all other characters need to be in UTF-8 units, appended with
107 // AppendString below.
108 void Append(const char& c);
109
110 // Appends a string to the std::string. Must be Convert()ed to use.
111 void AppendString(const std::string& str);
112
113 // Converts the builder from its default StringPiece to a full std::string,
114 // performing a copy. Once a builder is converted, it cannot be made a
115 // StringPiece again.
116 void Convert();
117
118 // Returns whether the builder can be converted to a StringPiece.
119 bool CanBeStringPiece() const;
120
121 // Returns the StringPiece representation. Returns an empty piece if it
122 // cannot be converted.
123 StringPiece AsStringPiece();
124
125 // Returns the builder as a std::string.
126 const std::string& AsString();
127
128 private:
129 // The beginning of the input string.
130 const char* pos_;
131
132 // Number of bytes in |pos_| that make up the string being built.
133 size_t length_;
134
135 // The copied string representation. NULL until Convert() is called.
136 // Strong. scoped_ptr<T> has too much of an overhead here.
137 std::string* string_;
138 };
139
140 // Quick check that the stream has capacity to consume |length| more bytes.
141 bool CanConsume(int length);
142
143 // The basic way to consume a single character in the stream. Consumes one
144 // byte of the input stream and returns a pointer to the rest of it.
145 const char* NextChar();
146
147 // Performs the equivalent of NextChar N times.
148 void NextNChars(int n);
149
150 // Skips over whitespace and comments to find the next token in the stream.
151 // This does not advance the parser for non-whitespace or comment chars.
152 Token GetNextToken();
153
154 // Consumes whitespace characters and comments until the next non-that is
155 // encountered.
156 void EatWhitespaceAndComments();
157 // Helper function that consumes a comment, assuming that the parser is
158 // currently wound to a '/'.
159 bool EatComment();
160
161 // Calls GetNextToken() and then ParseToken(). Caller owns the result.
162 Value* ParseNextToken();
163
164 // Takes a token that represents the start of a Value ("a structural token"
165 // in RFC terms) and consumes it, returning the result as an object the
166 // caller owns.
167 Value* ParseToken(Token token);
168
169 // Assuming that the parser is currently wound to '{', this parses a JSON
170 // object into a DictionaryValue.
171 Value* ConsumeDictionary();
172
173 // Assuming that the parser is wound to '[', this parses a JSON list into a
174 // ListValue.
175 Value* ConsumeList();
176
177 // Calls through ConsumeStringRaw and wraps it in a value.
178 Value* ConsumeString();
179
180 // Assuming that the parser is wound to a double quote, this parses a string,
181 // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
182 // success and Swap()s the result into |out|. Returns false on failure with
183 // error information set.
184 bool ConsumeStringRaw(StringBuilder* out);
185 // Helper function for ConsumeStringRaw() that consumes the next four or 10
186 // bytes (parser is wound to the first character of a HEX sequence, with the
187 // potential for consuming another \uXXXX for a surrogate). Returns true on
188 // success and places the UTF8 code units in |dest_string|, and false on
189 // failure.
190 bool DecodeUTF16(std::string* dest_string);
191 // Helper function for ConsumeStringRaw() that takes a single code point,
192 // decodes it into UTF-8 units, and appends it to the given builder. The
193 // point must be valid.
Alex Vakulenko674f0eb2016-01-20 08:10:48 -0800194 void DecodeUTF8(const int32_t& point, StringBuilder* dest);
Vitaly Bukacbed2062015-08-17 12:54:05 -0700195
196 // Assuming that the parser is wound to the start of a valid JSON number,
197 // this parses and converts it to either an int or double value.
198 Value* ConsumeNumber();
199 // Helper that reads characters that are ints. Returns true if a number was
200 // read and false on error.
201 bool ReadInt(bool allow_leading_zeros);
202
203 // Consumes the literal values of |true|, |false|, and |null|, assuming the
204 // parser is wound to the first character of any of those.
205 Value* ConsumeLiteral();
206
207 // Compares two string buffers of a given length.
208 static bool StringsAreEqual(const char* left, const char* right, size_t len);
209
210 // Sets the error information to |code| at the current column, based on
211 // |index_| and |index_last_line_|, with an optional positive/negative
212 // adjustment by |column_adjust|.
213 void ReportError(JSONReader::JsonParseError code, int column_adjust);
214
215 // Given the line and column number of an error, formats one of the error
216 // message contants from json_reader.h for human display.
217 static std::string FormatErrorMessage(int line, int column,
218 const std::string& description);
219
220 // base::JSONParserOptions that control parsing.
221 int options_;
222
223 // Pointer to the start of the input data.
224 const char* start_pos_;
225
226 // Pointer to the current position in the input data. Equivalent to
227 // |start_pos_ + index_|.
228 const char* pos_;
229
230 // Pointer to the last character of the input data.
231 const char* end_pos_;
232
233 // The index in the input stream to which the parser is wound.
234 int index_;
235
236 // The number of times the parser has recursed (current stack depth).
237 int stack_depth_;
238
239 // The line number that the parser is at currently.
240 int line_number_;
241
242 // The last value of |index_| on the previous line.
243 int index_last_line_;
244
245 // Error information.
246 JSONReader::JsonParseError error_code_;
247 int error_line_;
248 int error_column_;
249
250 friend class JSONParserTest;
251 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
252 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
253 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
254 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
255 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
256 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
257 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
258
259 DISALLOW_COPY_AND_ASSIGN(JSONParser);
260};
261
262} // namespace internal
263} // namespace base
264
265#endif // BASE_JSON_JSON_PARSER_H_