diff options
Diffstat (limited to 'icing/tokenization/icu/icu-language-segmenter_test.cc')
-rw-r--r-- | icing/tokenization/icu/icu-language-segmenter_test.cc | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc index 3bacbc6..a7f7419 100644 --- a/icing/tokenization/icu/icu-language-segmenter_test.cc +++ b/icing/tokenization/icu/icu-language-segmenter_test.cc @@ -296,12 +296,19 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, WordConnector) { // 2. '@' became a word connector // 3. <numeric><word-connector><numeric> such as "3'14" is now considered as // a single token. - if (IsIcu72PlusTokenization()) { + if (GetIcuTokenizationVersion() >= 72) { EXPECT_THAT( language_segmenter->GetAllTerms("com:google:android"), IsOkAndHolds(ElementsAre("com", ":", "google", ":", "android"))); - EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"), - IsOkAndHolds(ElementsAre("com@google@android"))); + // In ICU 74, the rules for '@' were reverted. + if (GetIcuTokenizationVersion() >= 74) { + EXPECT_THAT( + language_segmenter->GetAllTerms("com@google@android"), + IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android"))); + } else { + EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"), + IsOkAndHolds(ElementsAre("com@google@android"))); + } EXPECT_THAT(language_segmenter->GetAllTerms("3'14"), IsOkAndHolds(ElementsAre("3'14"))); } else { |