Blame - third_party/chromium/base/strings/string_util_unittest.cc - weave/libweave

blob: 451fbd87459e3916d8ecb3c30ea47fd63a9c6835 [file] [log] [blame]

Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/strings/string_util.h"
				6
				7	#include <math.h>
				8	#include <stdarg.h>
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	9	#include <stddef.h>
				10	#include <stdint.h>
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	11
				12	#include <algorithm>
				13
Vitaly Buka	8750b27	2015-08-18 18:39:08 -0700	[diff] [blame]	14	#include <gmock/gmock.h>
				15	#include <gtest/gtest.h>
				16
Alex Vakulenko	674f0eb	2016-01-20 08:10:48 -0800	[diff] [blame]	17	#include "base/macros.h"
Vitaly Buka	8750b27	2015-08-18 18:39:08 -0700	[diff] [blame]	18	#include "base/strings/utf_string_conversion_utils.h"
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	19
				20	using ::testing::ElementsAre;
				21
				22	namespace base {
				23
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	24	TEST(StringUtilTest, IsStringUTF8) {
				25	EXPECT_TRUE(IsStringUTF8("abc"));
				26	EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
				27	EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
				28	EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
				29	EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
				30	EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
				31
				32	// surrogate code points
				33	EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
				34	EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
				35	EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
				36
				37	// overlong sequences
				38	EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
				39	EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
				40	EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
				41	EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
				42	EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
				43	EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
				44	EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
				45	EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
				46	EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
				47	EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
				48	EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
				49
				50	// Beyond U+10FFFF (the upper limit of Unicode codespace)
				51	EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
				52	EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
				53	EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
				54
				55	// BOMs in UTF-16(BE\|LE) and UTF-32(BE\|LE)
				56	EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
				57	EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
				58	EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
				59	EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
				60
				61	// Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
				62	EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
				63	EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
				64	EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
				65	EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
				66	EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
				67	// Strings in legacy encodings. We can certainly make up strings
				68	// in a legacy encoding that are valid in UTF-8, but in real data,
				69	// most of them are invalid as UTF-8.
				70	EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
				71	EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
				72	EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
				73	// "abc" with U+201[CD] in windows-125[0-8]
				74	EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
				75	// U+0639 U+064E U+0644 U+064E in ISO-8859-6
				76	EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
				77	// U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
				78	EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
				79
				80	// Check that we support Embedded Nulls. The first uses the canonical UTF-8
				81	// representation, and the second uses a 2-byte sequence. The second version
				82	// is invalid UTF-8 since UTF-8 states that the shortest encoding for a
				83	// given codepoint must be used.
				84	static const char kEmbeddedNull[] = "embedded\0null";
				85	EXPECT_TRUE(IsStringUTF8(
				86	std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
				87	EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
				88	}
				89
				90	TEST(StringUtilTest, IsStringASCII) {
				91	static char char_ascii[] =
				92	"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	93	static std::wstring wchar_ascii(
				94	L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
				95
				96	// Test a variety of the fragment start positions and lengths in order to make
				97	// sure that bit masking in IsStringASCII works correctly.
				98	// Also, test that a non-ASCII character will be detected regardless of its
				99	// position inside the string.
				100	{
				101	const size_t string_length = arraysize(char_ascii) - 1;
				102	for (size_t offset = 0; offset < 8; ++offset) {
				103	for (size_t len = 0, max_len = string_length - offset; len < max_len;
				104	++len) {
				105	EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
				106	for (size_t char_pos = offset; char_pos < len; ++char_pos) {
				107	char_ascii[char_pos] \|= '\x80';
				108	EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
				109	char_ascii[char_pos] &= ~'\x80';
				110	}
				111	}
				112	}
				113	}
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	114	}
				115
				116	TEST(StringUtilTest, ReplaceChars) {
				117	struct TestData {
				118	const char* input;
				119	const char* replace_chars;
				120	const char* replace_with;
				121	const char* output;
				122	bool result;
				123	} cases[] = {
				124	{ "", "", "", "", false },
				125	{ "test", "", "", "test", false },
				126	{ "test", "", "!", "test", false },
				127	{ "test", "z", "!", "test", false },
				128	{ "test", "e", "!", "t!st", true },
				129	{ "test", "e", "!?", "t!?st", true },
				130	{ "test", "ez", "!", "t!st", true },
				131	{ "test", "zed", "!?", "t!?st", true },
				132	{ "test", "t", "!?", "!?es!?", true },
				133	{ "test", "et", "!>", "!>!>s!>", true },
				134	{ "test", "zest", "!", "!!!!", true },
				135	{ "test", "szt", "!", "!e!!", true },
				136	{ "test", "t", "test", "testestest", true },
				137	};
				138
				139	for (size_t i = 0; i < arraysize(cases); ++i) {
				140	std::string output;
				141	bool result = ReplaceChars(cases[i].input,
				142	cases[i].replace_chars,
				143	cases[i].replace_with,
				144	&output);
				145	EXPECT_EQ(cases[i].result, result);
				146	EXPECT_EQ(cases[i].output, output);
				147	}
				148	}
				149
Vitaly Buka	cbed206	2015-08-17 12:54:05 -0700	[diff] [blame]	150	} // namespace base