blob: b5463fb3c7cad17d9ffdd68ff9c38e7f32384295 [file] [log] [blame]
Vitaly Bukacbed2062015-08-17 12:54:05 -07001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_number_conversions.h"
6
7#include <ctype.h>
8#include <errno.h>
9#include <stdlib.h>
10#include <wctype.h>
11
12#include <limits>
13
14#include "base/logging.h"
15#include "base/scoped_clear_errno.h"
Vitaly Bukacbed2062015-08-17 12:54:05 -070016#include "base/third_party/dmg_fp/dmg_fp.h"
17
18namespace base {
19
20namespace {
21
22template <typename STR, typename INT, typename UINT, bool NEG>
23struct IntToStringT {
24 // This is to avoid a compiler warning about unary minus on unsigned type.
25 // For example, say you had the following code:
26 // template <typename INT>
27 // INT abs(INT value) { return value < 0 ? -value : value; }
28 // Even though if INT is unsigned, it's impossible for value < 0, so the
29 // unary minus will never be taken, the compiler will still generate a
30 // warning. We do a little specialization dance...
31 template <typename INT2, typename UINT2, bool NEG2>
32 struct ToUnsignedT {};
33
34 template <typename INT2, typename UINT2>
35 struct ToUnsignedT<INT2, UINT2, false> {
36 static UINT2 ToUnsigned(INT2 value) {
37 return static_cast<UINT2>(value);
38 }
39 };
40
41 template <typename INT2, typename UINT2>
42 struct ToUnsignedT<INT2, UINT2, true> {
43 static UINT2 ToUnsigned(INT2 value) {
44 return static_cast<UINT2>(value < 0 ? -value : value);
45 }
46 };
47
48 // This set of templates is very similar to the above templates, but
49 // for testing whether an integer is negative.
50 template <typename INT2, bool NEG2>
51 struct TestNegT {};
52 template <typename INT2>
53 struct TestNegT<INT2, false> {
54 static bool TestNeg(INT2 value) {
55 // value is unsigned, and can never be negative.
56 return false;
57 }
58 };
59 template <typename INT2>
60 struct TestNegT<INT2, true> {
61 static bool TestNeg(INT2 value) {
62 return value < 0;
63 }
64 };
65
66 static STR IntToString(INT value) {
67 // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
68 // So round up to allocate 3 output characters per byte, plus 1 for '-'.
69 const int kOutputBufSize = 3 * sizeof(INT) + 1;
70
71 // Allocate the whole string right away, we will right back to front, and
72 // then return the substr of what we ended up using.
73 STR outbuf(kOutputBufSize, 0);
74
75 bool is_neg = TestNegT<INT, NEG>::TestNeg(value);
76 // Even though is_neg will never be true when INT is parameterized as
77 // unsigned, even the presence of the unary operation causes a warning.
78 UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
79
80 typename STR::iterator it(outbuf.end());
81 do {
82 --it;
83 DCHECK(it != outbuf.begin());
84 *it = static_cast<typename STR::value_type>((res % 10) + '0');
85 res /= 10;
86 } while (res != 0);
87 if (is_neg) {
88 --it;
89 DCHECK(it != outbuf.begin());
90 *it = static_cast<typename STR::value_type>('-');
91 }
92 return STR(it, outbuf.end());
93 }
94};
95
96// Utility to convert a character to a digit in a given base
97template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
98};
99
100// Faster specialization for bases <= 10
101template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
102 public:
103 static bool Convert(CHAR c, uint8* digit) {
104 if (c >= '0' && c < '0' + BASE) {
105 *digit = static_cast<uint8>(c - '0');
106 return true;
107 }
108 return false;
109 }
110};
111
112// Specialization for bases where 10 < base <= 36
113template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
114 public:
115 static bool Convert(CHAR c, uint8* digit) {
116 if (c >= '0' && c <= '9') {
117 *digit = c - '0';
118 } else if (c >= 'a' && c < 'a' + BASE - 10) {
119 *digit = c - 'a' + 10;
120 } else if (c >= 'A' && c < 'A' + BASE - 10) {
121 *digit = c - 'A' + 10;
122 } else {
123 return false;
124 }
125 return true;
126 }
127};
128
129template<int BASE, typename CHAR> bool CharToDigit(CHAR c, uint8* digit) {
130 return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
131}
132
133// There is an IsWhitespace for wchars defined in string_util.h, but it is
134// locale independent, whereas the functions we are replacing were
135// locale-dependent. TBD what is desired, but for the moment let's not introduce
136// a change in behaviour.
137template<typename CHAR> class WhitespaceHelper {
138};
139
140template<> class WhitespaceHelper<char> {
141 public:
142 static bool Invoke(char c) {
143 return 0 != isspace(static_cast<unsigned char>(c));
144 }
145};
146
Vitaly Bukacbed2062015-08-17 12:54:05 -0700147template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
148 return WhitespaceHelper<CHAR>::Invoke(c);
149}
150
151// IteratorRangeToNumberTraits should provide:
152// - a typedef for iterator_type, the iterator type used as input.
153// - a typedef for value_type, the target numeric type.
154// - static functions min, max (returning the minimum and maximum permitted
155// values)
156// - constant kBase, the base in which to interpret the input
157template<typename IteratorRangeToNumberTraits>
158class IteratorRangeToNumber {
159 public:
160 typedef IteratorRangeToNumberTraits traits;
161 typedef typename traits::iterator_type const_iterator;
162 typedef typename traits::value_type value_type;
163
164 // Generalized iterator-range-to-number conversion.
165 //
166 static bool Invoke(const_iterator begin,
167 const_iterator end,
168 value_type* output) {
169 bool valid = true;
170
171 while (begin != end && LocalIsWhitespace(*begin)) {
172 valid = false;
173 ++begin;
174 }
175
176 if (begin != end && *begin == '-') {
177 if (!std::numeric_limits<value_type>::is_signed) {
178 valid = false;
179 } else if (!Negative::Invoke(begin + 1, end, output)) {
180 valid = false;
181 }
182 } else {
183 if (begin != end && *begin == '+') {
184 ++begin;
185 }
186 if (!Positive::Invoke(begin, end, output)) {
187 valid = false;
188 }
189 }
190
191 return valid;
192 }
193
194 private:
195 // Sign provides:
196 // - a static function, CheckBounds, that determines whether the next digit
197 // causes an overflow/underflow
198 // - a static function, Increment, that appends the next digit appropriately
199 // according to the sign of the number being parsed.
200 template<typename Sign>
201 class Base {
202 public:
203 static bool Invoke(const_iterator begin, const_iterator end,
204 typename traits::value_type* output) {
205 *output = 0;
206
207 if (begin == end) {
208 return false;
209 }
210
211 // Note: no performance difference was found when using template
212 // specialization to remove this check in bases other than 16
213 if (traits::kBase == 16 && end - begin > 2 && *begin == '0' &&
214 (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
215 begin += 2;
216 }
217
218 for (const_iterator current = begin; current != end; ++current) {
219 uint8 new_digit = 0;
220
221 if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
222 return false;
223 }
224
225 if (current != begin) {
226 if (!Sign::CheckBounds(output, new_digit)) {
227 return false;
228 }
229 *output *= traits::kBase;
230 }
231
232 Sign::Increment(new_digit, output);
233 }
234 return true;
235 }
236 };
237
238 class Positive : public Base<Positive> {
239 public:
240 static bool CheckBounds(value_type* output, uint8 new_digit) {
241 if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
242 (*output == static_cast<value_type>(traits::max() / traits::kBase) &&
243 new_digit > traits::max() % traits::kBase)) {
244 *output = traits::max();
245 return false;
246 }
247 return true;
248 }
249 static void Increment(uint8 increment, value_type* output) {
250 *output += increment;
251 }
252 };
253
254 class Negative : public Base<Negative> {
255 public:
256 static bool CheckBounds(value_type* output, uint8 new_digit) {
257 if (*output < traits::min() / traits::kBase ||
258 (*output == traits::min() / traits::kBase &&
259 new_digit > 0 - traits::min() % traits::kBase)) {
260 *output = traits::min();
261 return false;
262 }
263 return true;
264 }
265 static void Increment(uint8 increment, value_type* output) {
266 *output -= increment;
267 }
268 };
269};
270
271template<typename ITERATOR, typename VALUE, int BASE>
272class BaseIteratorRangeToNumberTraits {
273 public:
274 typedef ITERATOR iterator_type;
275 typedef VALUE value_type;
276 static value_type min() {
277 return std::numeric_limits<value_type>::min();
278 }
279 static value_type max() {
280 return std::numeric_limits<value_type>::max();
281 }
282 static const int kBase = BASE;
283};
284
285template<typename ITERATOR>
286class BaseHexIteratorRangeToIntTraits
287 : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
288};
289
290template<typename ITERATOR>
291class BaseHexIteratorRangeToUIntTraits
292 : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32, 16> {
293};
294
295template<typename ITERATOR>
296class BaseHexIteratorRangeToInt64Traits
297 : public BaseIteratorRangeToNumberTraits<ITERATOR, int64, 16> {
298};
299
300template<typename ITERATOR>
301class BaseHexIteratorRangeToUInt64Traits
302 : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64, 16> {
303};
304
305typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator>
306 HexIteratorRangeToIntTraits;
307
308typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator>
309 HexIteratorRangeToUIntTraits;
310
311typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator>
312 HexIteratorRangeToInt64Traits;
313
314typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator>
315 HexIteratorRangeToUInt64Traits;
316
317template<typename STR>
318bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
319 DCHECK_EQ(output->size(), 0u);
320 size_t count = input.size();
321 if (count == 0 || (count % 2) != 0)
322 return false;
323 for (uintptr_t i = 0; i < count / 2; ++i) {
324 uint8 msb = 0; // most significant 4 bits
325 uint8 lsb = 0; // least significant 4 bits
326 if (!CharToDigit<16>(input[i * 2], &msb) ||
327 !CharToDigit<16>(input[i * 2 + 1], &lsb))
328 return false;
329 output->push_back((msb << 4) | lsb);
330 }
331 return true;
332}
333
334template <typename VALUE, int BASE>
335class StringPieceToNumberTraits
336 : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator,
337 VALUE,
338 BASE> {
339};
340
341template <typename VALUE>
342bool StringToIntImpl(const StringPiece& input, VALUE* output) {
343 return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke(
344 input.begin(), input.end(), output);
345}
346
Vitaly Bukacbed2062015-08-17 12:54:05 -0700347} // namespace
348
349std::string IntToString(int value) {
350 return IntToStringT<std::string, int, unsigned int, true>::
351 IntToString(value);
352}
353
Vitaly Bukacbed2062015-08-17 12:54:05 -0700354std::string UintToString(unsigned int value) {
355 return IntToStringT<std::string, unsigned int, unsigned int, false>::
356 IntToString(value);
357}
358
Vitaly Bukacbed2062015-08-17 12:54:05 -0700359std::string Int64ToString(int64 value) {
360 return IntToStringT<std::string, int64, uint64, true>::IntToString(value);
361}
362
Vitaly Bukacbed2062015-08-17 12:54:05 -0700363std::string Uint64ToString(uint64 value) {
364 return IntToStringT<std::string, uint64, uint64, false>::IntToString(value);
365}
366
Vitaly Bukacbed2062015-08-17 12:54:05 -0700367std::string SizeTToString(size_t value) {
368 return IntToStringT<std::string, size_t, size_t, false>::IntToString(value);
369}
370
Vitaly Bukacbed2062015-08-17 12:54:05 -0700371std::string DoubleToString(double value) {
372 // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
373 char buffer[32];
374 dmg_fp::g_fmt(buffer, value);
375 return std::string(buffer);
376}
377
378bool StringToInt(const StringPiece& input, int* output) {
379 return StringToIntImpl(input, output);
380}
381
Vitaly Bukacbed2062015-08-17 12:54:05 -0700382bool StringToUint(const StringPiece& input, unsigned* output) {
383 return StringToIntImpl(input, output);
384}
385
Vitaly Bukacbed2062015-08-17 12:54:05 -0700386bool StringToInt64(const StringPiece& input, int64* output) {
387 return StringToIntImpl(input, output);
388}
389
Vitaly Bukacbed2062015-08-17 12:54:05 -0700390bool StringToUint64(const StringPiece& input, uint64* output) {
391 return StringToIntImpl(input, output);
392}
393
Vitaly Bukacbed2062015-08-17 12:54:05 -0700394bool StringToSizeT(const StringPiece& input, size_t* output) {
395 return StringToIntImpl(input, output);
396}
397
Vitaly Bukacbed2062015-08-17 12:54:05 -0700398bool StringToDouble(const std::string& input, double* output) {
399 // Thread-safe? It is on at least Mac, Linux, and Windows.
400 ScopedClearErrno clear_errno;
401
402 char* endptr = NULL;
403 *output = dmg_fp::strtod(input.c_str(), &endptr);
404
405 // Cases to return false:
406 // - If errno is ERANGE, there was an overflow or underflow.
407 // - If the input string is empty, there was nothing to parse.
408 // - If endptr does not point to the end of the string, there are either
409 // characters remaining in the string after a parsed number, or the string
410 // does not begin with a parseable number. endptr is compared to the
411 // expected end given the string's stated length to correctly catch cases
412 // where the string contains embedded NUL characters.
413 // - If the first character is a space, there was leading whitespace
414 return errno == 0 &&
415 !input.empty() &&
416 input.c_str() + input.length() == endptr &&
417 !isspace(input[0]);
418}
419
420// Note: if you need to add String16ToDouble, first ask yourself if it's
421// really necessary. If it is, probably the best implementation here is to
422// convert to 8-bit and then use the 8-bit version.
423
424// Note: if you need to add an iterator range version of StringToDouble, first
425// ask yourself if it's really necessary. If it is, probably the best
426// implementation here is to instantiate a string and use the string version.
427
428std::string HexEncode(const void* bytes, size_t size) {
429 static const char kHexChars[] = "0123456789ABCDEF";
430
431 // Each input byte creates two output hex characters.
432 std::string ret(size * 2, '\0');
433
434 for (size_t i = 0; i < size; ++i) {
435 char b = reinterpret_cast<const char*>(bytes)[i];
436 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
437 ret[(i * 2) + 1] = kHexChars[b & 0xf];
438 }
439 return ret;
440}
441
442bool HexStringToInt(const StringPiece& input, int* output) {
443 return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
444 input.begin(), input.end(), output);
445}
446
447bool HexStringToUInt(const StringPiece& input, uint32* output) {
448 return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke(
449 input.begin(), input.end(), output);
450}
451
452bool HexStringToInt64(const StringPiece& input, int64* output) {
453 return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke(
454 input.begin(), input.end(), output);
455}
456
457bool HexStringToUInt64(const StringPiece& input, uint64* output) {
458 return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke(
459 input.begin(), input.end(), output);
460}
461
462bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
463 return HexStringToBytesT(input, output);
464}
465
466} // namespace base