Blame - third_party/chromium/base/strings/utf_string_conversion_utils.cc - weave/libweave

blob: 05bd1225720b6e46640635bfa8723ed78a621e13 [file] [log] [blame]

Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	1	// Copyright (c) 2009 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/strings/utf_string_conversion_utils.h"
				6
				7	#include "base/third_party/icu/icu_utf.h"
				8
				9	namespace base {
				10
				11	// ReadUnicodeCharacter --------------------------------------------------------
				12
				13	bool ReadUnicodeCharacter(const char* src,
				14	int32 src_len,
				15	int32* char_index,
				16	uint32* code_point_out) {
				17	// U8_NEXT expects to be able to use -1 to signal an error, so we must
				18	// use a signed type for code_point. But this function returns false
				19	// on error anyway, so code_point_out is unsigned.
				20	int32 code_point;
				21	CBU8_NEXT(src, *char_index, src_len, code_point);
				22	*code_point_out = static_cast<uint32>(code_point);
				23
				24	// The ICU macro above moves to the next char, we want to point to the last
				25	// char consumed.
				26	(*char_index)--;
				27
				28	// Validate the decoded value.
				29	return IsValidCodepoint(code_point);
				30	}
				31
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	32	// WriteUnicodeCharacter -------------------------------------------------------
				33
				34	size_t WriteUnicodeCharacter(uint32 code_point, std::string* output) {
				35	if (code_point <= 0x7f) {
				36	// Fast path the common case of one byte.
				37	output->push_back(static_cast<char>(code_point));
				38	return 1;
				39	}
				40
				41
				42	// CBU8_APPEND_UNSAFE can append up to 4 bytes.
				43	size_t char_offset = output->length();
				44	size_t original_char_offset = char_offset;
				45	output->resize(char_offset + CBU8_MAX_LENGTH);
				46
				47	CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
				48
				49	// CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
				50	// it will represent the new length of the string.
				51	output->resize(char_offset);
				52	return char_offset - original_char_offset;
				53	}
				54
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	55	// Generalized Unicode converter -----------------------------------------------
				56
				57	template<typename CHAR>
				58	void PrepareForUTF8Output(const CHAR* src,
				59	size_t src_len,
				60	std::string* output) {
				61	output->clear();
				62	if (src_len == 0)
				63	return;
				64	if (src[0] < 0x80) {
				65	// Assume that the entire input will be ASCII.
				66	output->reserve(src_len);
				67	} else {
				68	// Assume that the entire input is non-ASCII and will have 3 bytes per char.
				69	output->reserve(src_len * 3);
				70	}
				71	}
				72
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	73	template<typename STRING>
				74	void PrepareForUTF16Or32Output(const char* src,
				75	size_t src_len,
				76	STRING* output) {
				77	output->clear();
				78	if (src_len == 0)
				79	return;
				80	if (static_cast<unsigned char>(src[0]) < 0x80) {
				81	// Assume the input is all ASCII, which means 1:1 correspondence.
				82	output->reserve(src_len);
				83	} else {
				84	// Otherwise assume that the UTF-8 sequences will have 2 bytes for each
				85	// character.
				86	output->reserve(src_len / 2);
				87	}
				88	}
				89
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	90	} // namespace base