blob: 7ba612fc9ba89de4014e0ac9d4e067f35b9d786d [file] [log] [blame]
Vitaly Bukacbed2062015-08-17 12:54:05 -07001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/json/string_escape.h"
6
7#include <string>
8
Vitaly Buka8750b272015-08-18 18:39:08 -07009#include "base/logging.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070010#include "base/strings/string_util.h"
11#include "base/strings/stringprintf.h"
12#include "base/strings/utf_string_conversion_utils.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070013#include "base/third_party/icu/icu_utf.h"
14
15namespace base {
16
17namespace {
18
19// Format string for printing a \uXXXX escape sequence.
20const char kU16EscapeFormat[] = "\\u%04X";
21
22// The code point to output for an invalid input code unit.
23const uint32 kReplacementCodePoint = 0xFFFD;
24
25// Used below in EscapeSpecialCodePoint().
26COMPILE_ASSERT('<' == 0x3C, less_than_sign_is_0x3c);
27
28// Try to escape the |code_point| if it is a known special character. If
29// successful, returns true and appends the escape sequence to |dest|. This
30// isn't required by the spec, but it's more readable by humans.
31bool EscapeSpecialCodePoint(uint32 code_point, std::string* dest) {
32 // WARNING: if you add a new case here, you need to update the reader as well.
33 // Note: \v is in the reader, but not here since the JSON spec doesn't
34 // allow it.
35 switch (code_point) {
36 case '\b':
37 dest->append("\\b");
38 break;
39 case '\f':
40 dest->append("\\f");
41 break;
42 case '\n':
43 dest->append("\\n");
44 break;
45 case '\r':
46 dest->append("\\r");
47 break;
48 case '\t':
49 dest->append("\\t");
50 break;
51 case '\\':
52 dest->append("\\\\");
53 break;
54 case '"':
55 dest->append("\\\"");
56 break;
57 // Escape < to prevent script execution; escaping > is not necessary and
58 // not doing so save a few bytes.
59 case '<':
60 dest->append("\\u003C");
61 break;
62 default:
63 return false;
64 }
65 return true;
66}
67
68template <typename S>
69bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
70 bool did_replacement = false;
71
72 if (put_in_quotes)
73 dest->push_back('"');
74
75 // Casting is necessary because ICU uses int32. Try and do so safely.
76 CHECK_LE(str.length(), static_cast<size_t>(kint32max));
77 const int32 length = static_cast<int32>(str.length());
78
79 for (int32 i = 0; i < length; ++i) {
80 uint32 code_point;
81 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) {
82 code_point = kReplacementCodePoint;
83 did_replacement = true;
84 }
85
86 if (EscapeSpecialCodePoint(code_point, dest))
87 continue;
88
89 // Escape non-printing characters.
90 if (code_point < 32)
91 base::StringAppendF(dest, kU16EscapeFormat, code_point);
92 else
93 WriteUnicodeCharacter(code_point, dest);
94 }
95
96 if (put_in_quotes)
97 dest->push_back('"');
98
99 return !did_replacement;
100}
101
102} // namespace
103
104bool EscapeJSONString(const StringPiece& str,
105 bool put_in_quotes,
106 std::string* dest) {
107 return EscapeJSONStringImpl(str, put_in_quotes, dest);
108}
109
Vitaly Bukacbed2062015-08-17 12:54:05 -0700110std::string GetQuotedJSONString(const StringPiece& str) {
111 std::string dest;
112 bool ok = EscapeJSONStringImpl(str, true, &dest);
113 DCHECK(ok);
114 return dest;
115}
116
Vitaly Bukacbed2062015-08-17 12:54:05 -0700117std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
118 bool put_in_quotes) {
119 std::string dest;
120
121 if (put_in_quotes)
122 dest.push_back('"');
123
124 for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
125 unsigned char c = *it;
126 if (EscapeSpecialCodePoint(c, &dest))
127 continue;
128
129 if (c < 32 || c > 126)
130 base::StringAppendF(&dest, kU16EscapeFormat, c);
131 else
132 dest.push_back(*it);
133 }
134
135 if (put_in_quotes)
136 dest.push_back('"');
137
138 return dest;
139}
140
141} // namespace base