aboutsummaryrefslogtreecommitdiff
path: root/icing/tokenization/icu/icu-language-segmenter_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/tokenization/icu/icu-language-segmenter_test.cc')
-rw-r--r--icing/tokenization/icu/icu-language-segmenter_test.cc13
1 files changed, 10 insertions, 3 deletions
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 3bacbc6..a7f7419 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -296,12 +296,19 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) {
// 2. '@' became a word connector
// 3. <numeric><word-connector><numeric> such as "3'14" is now considered as
// a single token.
- if (IsIcu72PlusTokenization()) {
+ if (GetIcuTokenizationVersion() >= 72) {
EXPECT_THAT(
language_segmenter->GetAllTerms("com:google:android"),
IsOkAndHolds(ElementsAre("com", ":", "google", ":", "android")));
- EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
- IsOkAndHolds(ElementsAre("com@google@android")));
+ // In ICU 74, the rules for '@' were reverted.
+ if (GetIcuTokenizationVersion() >= 74) {
+ EXPECT_THAT(
+ language_segmenter->GetAllTerms("com@google@android"),
+ IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android")));
+ } else {
+ EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
+ IsOkAndHolds(ElementsAre("com@google@android")));
+ }
EXPECT_THAT(language_segmenter->GetAllTerms("3'14"),
IsOkAndHolds(ElementsAre("3'14")));
} else {