Blame - third_party/chromium/base/strings/string_util.h - weave/libweave

blob: f505bb6fbaeb6d4ce8b275b495f7660baec03fb4 [file] [log] [blame]

Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4	//
				5	// This file defines utility functions for working with strings.
				6
				7	#ifndef BASE_STRINGS_STRING_UTIL_H_
				8	#define BASE_STRINGS_STRING_UTIL_H_
				9
				10	#include <ctype.h>
				11	#include <stdarg.h> // va_list
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	12	#include <stddef.h>
				13	#include <stdint.h>
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	14
				15	#include <string>
				16	#include <vector>
				17
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	18	#include "base/compiler_specific.h"
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	19	#include "base/strings/string_piece.h" // For implicit conversions.
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	20	#include "build/build_config.h"
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	21
				22	// On Android, bionic's stdio.h defines an snprintf macro when being built with
				23	// clang. Undefine it here so it won't collide with base::snprintf().
				24	#undef snprintf
				25
				26	namespace base {
				27
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	28	// C standard-library functions that aren't cross-platform are provided as
				29	// "base::...", and their prototypes are listed below. These functions are
				30	// then implemented as inline calls to the platform-specific equivalents in the
				31	// platform-specific headers.
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	32
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	33	// Wrapper for vsnprintf that always null-terminates and always returns the
				34	// number of characters that would be in an untruncated formatted
				35	// string, even when truncation occurs.
				36	int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
				37	PRINTF_FORMAT(3, 0);
				38
				39	// Some of these implementations need to be inlined.
				40
				41	// We separate the declaration from the implementation of this inline
				42	// function just so the PRINTF_FORMAT works.
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	43	inline int snprintf(char* buffer,
				44	size_t size,
				45	_Printf_format_string_ const char* format,
				46	...) PRINTF_FORMAT(3, 4);
				47	inline int snprintf(char* buffer,
				48	size_t size,
				49	_Printf_format_string_ const char* format,
				50	...) {
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	51	va_list arguments;
				52	va_start(arguments, format);
				53	int result = vsnprintf(buffer, size, format, arguments);
				54	va_end(arguments);
				55	return result;
				56	}
				57
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	58	// BSD-style safe and consistent string copy functions.
				59	// Copies \|src\| to \|dst\|, where \|dst_size\| is the total allocated size of \|dst\|.
				60	// Copies at most \|dst_size\|-1 characters, and always NULL terminates \|dst\|, as
				61	// long as \|dst_size\| is not 0. Returns the length of \|src\| in characters.
				62	// If the return value is >= dst_size, then the output was truncated.
				63	// NOTE: All sizes are in number of characters, NOT in bytes.
				64	size_t strlcpy(char* dst, const char* src, size_t dst_size);
				65
				66	// ASCII-specific tolower. The standard library's tolower is locale sensitive,
				67	// so we don't want to use it here.
				68	inline char ToLowerASCII(char c) {
				69	return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
				70	}
				71
				72	// ASCII-specific toupper. The standard library's toupper is locale sensitive,
				73	// so we don't want to use it here.
				74	inline char ToUpperASCII(char c) {
				75	return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
				76	}
				77	// Converts the given string to it's ASCII-lowercase equivalent.
				78	std::string ToLowerASCII(StringPiece str);
				79	// Converts the given string to it's ASCII-uppercase equivalent.
				80	std::string ToUpperASCII(StringPiece str);
				81
				82	// Functor for case-insensitive ASCII comparisons for STL algorithms like
				83	// std::search.
				84	//
				85	// Note that a full Unicode version of this functor is not possible to write
				86	// because case mappings might change the number of characters, depend on
				87	// context (combining accents), and require handling UTF-16. If you need
				88	// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
				89	// use a normal operator== on the result.
				90	template<typename Char> struct CaseInsensitiveCompareASCII {
				91	public:
				92	bool operator()(Char x, Char y) const {
				93	return ToLowerASCII(x) == ToLowerASCII(y);
				94	}
				95	};
				96
				97	// Like strcasecmp for case-insensitive ASCII characters only. Returns:
				98	// -1 (a < b)
				99	// 0 (a == b)
				100	// 1 (a > b)
				101	// (unlike strcasecmp which can return values greater or less than 1/-1). For
				102	// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
				103	// and then just call the normal string operators on the result.
				104	int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
				105
				106	// Equality for ASCII case-insensitive comparisons. For full Unicode support,
				107	// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
				108	// == or !=.
				109	bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
				110
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	111	// Contains the set of characters representing whitespace in the corresponding
				112	// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
				113	// by HTML5, and don't include control characters.
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	114	extern const char kWhitespaceASCII[];
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	115
				116	// Replaces characters in \|replace_chars\| from anywhere in \|input\| with
				117	// \|replace_with\|. Each character in \|replace_chars\| will be replaced with
				118	// the \|replace_with\| string. Returns true if any characters were replaced.
				119	// \|replace_chars\| must be null-terminated.
				120	// NOTE: Safe to use the same variable for both \|input\| and \|output\|.
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	121	bool ReplaceChars(const std::string& input,
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	122	const StringPiece& replace_chars,
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	123	const std::string& replace_with,
				124	std::string* output);
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	125
				126	enum TrimPositions {
				127	TRIM_NONE = 0,
				128	TRIM_LEADING = 1 << 0,
				129	TRIM_TRAILING = 1 << 1,
				130	TRIM_ALL = TRIM_LEADING \| TRIM_TRAILING,
				131	};
				132
				133	// Removes characters in \|trim_chars\| from the beginning and end of \|input\|.
				134	// The 8-bit version only works on 8-bit characters, not UTF-8.
				135	//
				136	// It is safe to use the same variable for both \|input\| and \|output\| (this is
				137	// the normal usage to trim in-place).
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	138	bool TrimString(const std::string& input,
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	139	StringPiece trim_chars,
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	140	std::string* output);
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	141
				142	// StringPiece versions of the above. The returned pieces refer to the original
				143	// buffer.
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	144	StringPiece TrimString(StringPiece input,
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	145	const StringPiece& trim_chars,
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	146	TrimPositions positions);
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	147
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	148	// Trims any whitespace from either end of the input string.
				149	//
				150	// The StringPiece versions return a substring referencing the input buffer.
				151	// The ASCII versions look only for ASCII whitespace.
				152	//
				153	// The std::string versions return where whitespace was found.
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	154	// NOTE: Safe to use the same variable for both input and output.
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	155	TrimPositions TrimWhitespaceASCII(const std::string& input,
				156	TrimPositions positions,
				157	std::string* output);
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	158
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	159	// Returns true if the specified string matches the criteria. How can a wide
				160	// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
				161	// first case) or characters that use only 8-bits and whose 8-bit
				162	// representation looks like a UTF-8 string (the second case).
				163	//
				164	// Note that IsStringUTF8 checks not only if the input is structurally
				165	// valid but also if it doesn't contain any non-character codepoint
				166	// (e.g. U+FFFE). It's done on purpose because all the existing callers want
				167	// to have the maximum 'discriminating' power from other encodings. If
				168	// there's a use case for just checking the structural validity, we have to
				169	// add a new function for that.
				170	//
				171	// IsStringASCII assumes the input is likely all ASCII, and does not leave early
				172	// if it is not the case.
Vitaly Buka	60b8f00	2015-08-20 13:47:48 -0700	[diff] [blame]	173	bool IsStringUTF8(const StringPiece& str);
				174	bool IsStringASCII(const StringPiece& str);
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	175
Alex Vakulenko	bf79a9e	2016-03-28 15:11:43 -0700	[diff] [blame]	176	// Reserves enough memory in \|str\| to accommodate \|length_with_null\| characters,
				177	// sets the size of \|str\| to \|length_with_null - 1\| characters, and returns a
				178	// pointer to the underlying contiguous array of characters. This is typically
				179	// used when calling a function that writes results into a character array, but
				180	// the caller wants the data to be managed by a string-like object. It is
				181	// convenient in that is can be used inline in the call, and fast in that it
				182	// avoids copying the results of the call from a char* into a string.
				183	//
				184	// \|length_with_null\| must be at least 2, since otherwise the underlying string
				185	// would have size 0, and trying to access &((*str)[0]) in that case can result
				186	// in a number of problems.
				187	//
				188	// Internally, this takes linear time because the resize() call 0-fills the
				189	// underlying array for potentially all
				190	// (\|length_with_null - 1\| * sizeof(string_type::value_type)) bytes. Ideally we
				191	// could avoid this aspect of the resize() call, as we expect the caller to
				192	// immediately write over this memory, but there is no other way to set the size
				193	// of the string, and not doing that will mean people who access \|str\| rather
				194	// than str.c_str() will get back a string of whatever size \|str\| had on entry
				195	// to this function (probably 0).
				196	char* WriteInto(std::string* str, size_t length_with_null);
				197
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	198	} // namespace base
				199
				200	#if defined(OS_WIN)
				201	#include "base/strings/string_util_win.h"
				202	#elif defined(OS_POSIX)
				203	#include "base/strings/string_util_posix.h"
				204	#else
				205	#error Define string operations appropriately for your platform
				206	#endif
				207
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	208	#endif // BASE_STRINGS_STRING_UTIL_H_