diff options
Diffstat (limited to 'base/i18n/case_conversion_unittest.cc')
-rw-r--r-- | base/i18n/case_conversion_unittest.cc | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/base/i18n/case_conversion_unittest.cc b/base/i18n/case_conversion_unittest.cc new file mode 100644 index 0000000000..ee795bc6e3 --- /dev/null +++ b/base/i18n/case_conversion_unittest.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/case_conversion.h" +#include "base/i18n/rtl.h" +#include "base/strings/utf_string_conversions.h" +#include "base/test/icu_test_util.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/icu/source/i18n/unicode/usearch.h" + +namespace base { +namespace i18n { + +namespace { + +const wchar_t kNonASCIIMixed[] = + L"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25" + L"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F" + L"\x20\x1E00\x1E01"; +const wchar_t kNonASCIILower[] = + L"\xE4\xF6\xE4\xF6\x20\xEF\xEF" + L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07" + L"\x1F07\x20\x1E01\x1E01"; +const wchar_t kNonASCIIUpper[] = + L"\xC4\xD6\xC4\xD6\x20\xCF\xCF" + L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F" + L"\x1F0F\x20\x1E00\x1E00"; + +} // namespace + +// Test upper and lower case string conversion. +TEST(CaseConversionTest, UpperLower) { + const string16 mixed(ASCIIToUTF16("Text with UPPer & lowER casE.")); + const string16 expected_lower(ASCIIToUTF16("text with upper & lower case.")); + const string16 expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE.")); + + string16 result = ToLower(mixed); + EXPECT_EQ(expected_lower, result); + + result = ToUpper(mixed); + EXPECT_EQ(expected_upper, result); +} + +TEST(CaseConversionTest, NonASCII) { + const string16 mixed(WideToUTF16(kNonASCIIMixed)); + const string16 expected_lower(WideToUTF16(kNonASCIILower)); + const string16 expected_upper(WideToUTF16(kNonASCIIUpper)); + + string16 result = ToLower(mixed); + EXPECT_EQ(expected_lower, result); + + result = ToUpper(mixed); + EXPECT_EQ(expected_upper, result); +} + +TEST(CaseConversionTest, TurkishLocaleConversion) { + const string16 mixed(WideToUTF16(L"\x49\x131")); + const string16 expected_lower(WideToUTF16(L"\x69\x131")); + const string16 expected_upper(WideToUTF16(L"\x49\x49")); + + test::ScopedRestoreICUDefaultLocale restore_locale; + i18n::SetICUDefaultLocale("en_US"); + + string16 result = ToLower(mixed); + EXPECT_EQ(expected_lower, result); + + result = ToUpper(mixed); + EXPECT_EQ(expected_upper, result); + + i18n::SetICUDefaultLocale("tr"); + + const string16 expected_lower_turkish(WideToUTF16(L"\x131\x131")); + const string16 expected_upper_turkish(WideToUTF16(L"\x49\x49")); + + result = ToLower(mixed); + EXPECT_EQ(expected_lower_turkish, result); + + result = ToUpper(mixed); + EXPECT_EQ(expected_upper_turkish, result); +} + +TEST(CaseConversionTest, FoldCase) { + // Simple ASCII, should lower-case. + EXPECT_EQ(ASCIIToUTF16("hello, world"), + FoldCase(ASCIIToUTF16("Hello, World"))); + + // Non-ASCII cases from above. They should all fold to the same result. + EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)), + FoldCase(WideToUTF16(kNonASCIILower))); + EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)), + FoldCase(WideToUTF16(kNonASCIIUpper))); + + // Turkish cases from above. This is the lower-case expected result from the + // US locale. It should be the same even when the current locale is Turkish. + const string16 turkish(WideToUTF16(L"\x49\x131")); + const string16 turkish_expected(WideToUTF16(L"\x69\x131")); + + test::ScopedRestoreICUDefaultLocale restore_locale; + i18n::SetICUDefaultLocale("en_US"); + EXPECT_EQ(turkish_expected, FoldCase(turkish)); + + i18n::SetICUDefaultLocale("tr"); + EXPECT_EQ(turkish_expected, FoldCase(turkish)); + + // Test a case that gets bigger when processed. + // U+130 = LATIN CAPITAL LETTER I WITH DOT ABOVE gets folded to a lower case + // "i" followed by U+307 COMBINING DOT ABOVE. + EXPECT_EQ(WideToUTF16(L"i\u0307j"), FoldCase(WideToUTF16(L"\u0130j"))); + + // U+00DF (SHARP S) and U+1E9E (CAPIRAL SHARP S) are both folded to "ss". + EXPECT_EQ(ASCIIToUTF16("ssss"), FoldCase(WideToUTF16(L"\u00DF\u1E9E"))); +} + +} // namespace i18n +} // namespace base + + + |