Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | // |
| 5 | // This file defines utility functions for working with strings. |
| 6 | |
| 7 | #ifndef BASE_STRINGS_STRING_UTIL_H_ |
| 8 | #define BASE_STRINGS_STRING_UTIL_H_ |
| 9 | |
| 10 | #include <ctype.h> |
| 11 | #include <stdarg.h> // va_list |
| 12 | |
| 13 | #include <string> |
| 14 | #include <vector> |
| 15 | |
| 16 | #include "base/base_export.h" |
| 17 | #include "base/basictypes.h" |
| 18 | #include "base/compiler_specific.h" |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 19 | #include "base/strings/string_piece.h" // For implicit conversions. |
| 20 | |
| 21 | // On Android, bionic's stdio.h defines an snprintf macro when being built with |
| 22 | // clang. Undefine it here so it won't collide with base::snprintf(). |
| 23 | #undef snprintf |
| 24 | |
| 25 | namespace base { |
| 26 | |
| 27 | // C standard-library functions like "strncasecmp" and "snprintf" that aren't |
| 28 | // cross-platform are provided as "base::strncasecmp", and their prototypes |
| 29 | // are listed below. These functions are then implemented as inline calls |
| 30 | // to the platform-specific equivalents in the platform-specific headers. |
| 31 | |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 32 | // Wrapper for vsnprintf that always null-terminates and always returns the |
| 33 | // number of characters that would be in an untruncated formatted |
| 34 | // string, even when truncation occurs. |
| 35 | int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) |
| 36 | PRINTF_FORMAT(3, 0); |
| 37 | |
| 38 | // Some of these implementations need to be inlined. |
| 39 | |
| 40 | // We separate the declaration from the implementation of this inline |
| 41 | // function just so the PRINTF_FORMAT works. |
| 42 | inline int snprintf(char* buffer, size_t size, const char* format, ...) |
| 43 | PRINTF_FORMAT(3, 4); |
| 44 | inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
| 45 | va_list arguments; |
| 46 | va_start(arguments, format); |
| 47 | int result = vsnprintf(buffer, size, format, arguments); |
| 48 | va_end(arguments); |
| 49 | return result; |
| 50 | } |
| 51 | |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 52 | // Contains the set of characters representing whitespace in the corresponding |
| 53 | // encoding. Null-terminated. The ASCII versions are the whitespaces as defined |
| 54 | // by HTML5, and don't include control characters. |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 55 | extern const char kWhitespaceASCII[]; |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 56 | |
| 57 | // Replaces characters in |replace_chars| from anywhere in |input| with |
| 58 | // |replace_with|. Each character in |replace_chars| will be replaced with |
| 59 | // the |replace_with| string. Returns true if any characters were replaced. |
| 60 | // |replace_chars| must be null-terminated. |
| 61 | // NOTE: Safe to use the same variable for both |input| and |output|. |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 62 | bool ReplaceChars(const std::string& input, |
| 63 | const base::StringPiece& replace_chars, |
| 64 | const std::string& replace_with, |
| 65 | std::string* output); |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 66 | |
| 67 | enum TrimPositions { |
| 68 | TRIM_NONE = 0, |
| 69 | TRIM_LEADING = 1 << 0, |
| 70 | TRIM_TRAILING = 1 << 1, |
| 71 | TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
| 72 | }; |
| 73 | |
| 74 | // Removes characters in |trim_chars| from the beginning and end of |input|. |
| 75 | // The 8-bit version only works on 8-bit characters, not UTF-8. |
| 76 | // |
| 77 | // It is safe to use the same variable for both |input| and |output| (this is |
| 78 | // the normal usage to trim in-place). |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 79 | bool TrimString(const std::string& input, |
| 80 | base::StringPiece trim_chars, |
| 81 | std::string* output); |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 82 | |
| 83 | // StringPiece versions of the above. The returned pieces refer to the original |
| 84 | // buffer. |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 85 | StringPiece TrimString(StringPiece input, |
| 86 | const base::StringPiece& trim_chars, |
| 87 | TrimPositions positions); |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 88 | |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 89 | // Trims any whitespace from either end of the input string. Returns where |
| 90 | // whitespace was found. |
| 91 | // The non-wide version has two functions: |
| 92 | // * TrimWhitespaceASCII() |
| 93 | // This function is for ASCII strings and only looks for ASCII whitespace; |
| 94 | // Please choose the best one according to your usage. |
| 95 | // NOTE: Safe to use the same variable for both input and output. |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 96 | TrimPositions TrimWhitespaceASCII(const std::string& input, |
| 97 | TrimPositions positions, |
| 98 | std::string* output); |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 99 | |
| 100 | // Deprecated. This function is only for backward compatibility and calls |
| 101 | // TrimWhitespaceASCII(). |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 102 | TrimPositions TrimWhitespace(const std::string& input, |
| 103 | TrimPositions positions, |
| 104 | std::string* output); |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 105 | |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 106 | // Returns true if the specified string matches the criteria. How can a wide |
| 107 | // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
| 108 | // first case) or characters that use only 8-bits and whose 8-bit |
| 109 | // representation looks like a UTF-8 string (the second case). |
| 110 | // |
| 111 | // Note that IsStringUTF8 checks not only if the input is structurally |
| 112 | // valid but also if it doesn't contain any non-character codepoint |
| 113 | // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
| 114 | // to have the maximum 'discriminating' power from other encodings. If |
| 115 | // there's a use case for just checking the structural validity, we have to |
| 116 | // add a new function for that. |
| 117 | // |
| 118 | // IsStringASCII assumes the input is likely all ASCII, and does not leave early |
| 119 | // if it is not the case. |
Vitaly Buka | 60b8f00 | 2015-08-20 13:47:48 -0700 | [diff] [blame] | 120 | bool IsStringUTF8(const StringPiece& str); |
| 121 | bool IsStringASCII(const StringPiece& str); |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 122 | |
| 123 | } // namespace base |
| 124 | |
| 125 | #if defined(OS_WIN) |
| 126 | #include "base/strings/string_util_win.h" |
| 127 | #elif defined(OS_POSIX) |
| 128 | #include "base/strings/string_util_posix.h" |
| 129 | #else |
| 130 | #error Define string operations appropriately for your platform |
| 131 | #endif |
| 132 | |
Vitaly Buka | cbed206 | 2015-08-17 12:54:05 -0700 | [diff] [blame] | 133 | #endif // BASE_STRINGS_STRING_UTIL_H_ |