diff options
Diffstat (limited to 'icing/tokenization/icu')
-rw-r--r-- | icing/tokenization/icu/icu-language-segmenter.cc | 5 | ||||
-rw-r--r-- | icing/tokenization/icu/icu-language-segmenter.h | 2 | ||||
-rw-r--r-- | icing/tokenization/icu/icu-language-segmenter_test.cc | 182 |
3 files changed, 126 insertions, 63 deletions
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc index dc7b0a4..fd790cf 100644 --- a/icing/tokenization/icu/icu-language-segmenter.cc +++ b/icing/tokenization/icu/icu-language-segmenter.cc @@ -325,14 +325,15 @@ IcuLanguageSegmenter::IcuLanguageSegmenter(std::string locale) : locale_(std::move(locale)) {} libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>> -IcuLanguageSegmenter::Segment(const std::string_view text) const { +IcuLanguageSegmenter::Segment(const std::string_view text, + LanguageSegmenter::AccessType) const { return IcuLanguageSegmenterIterator::Create(text, locale_); } libtextclassifier3::StatusOr<std::vector<std::string_view>> IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const { ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator, - Segment(text)); + IcuLanguageSegmenterIterator::Create(text, locale_)); std::vector<std::string_view> terms; while (iterator->Advance()) { terms.push_back(iterator->GetTerm()); diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h index 4115461..f9cfbcb 100644 --- a/icing/tokenization/icu/icu-language-segmenter.h +++ b/icing/tokenization/icu/icu-language-segmenter.h @@ -55,7 +55,7 @@ class IcuLanguageSegmenter : public LanguageSegmenter { // An iterator of terms on success // INTERNAL_ERROR if any error occurs libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>> - Segment(std::string_view text) const override; + Segment(std::string_view text, LanguageSegmenter::AccessType) const override; // The segmentation depends on the language detected in the input text. // diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc index 6771050..c88b992 100644 --- a/icing/tokenization/icu/icu-language-segmenter_test.cc +++ b/icing/tokenization/icu/icu-language-segmenter_test.cc @@ -419,8 +419,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) { // iterator is done. text_with_spaces = absl_ports::StrCat(std::string(kNumSeparators, ' '), "Hello", " ", "World"); - ICING_ASSERT_OK_AND_ASSIGN(auto itr, - language_segmenter->Segment(text_with_spaces)); + ICING_ASSERT_OK_AND_ASSIGN( + auto itr, + language_segmenter->Segment( + text_with_spaces, LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> terms; while (itr->Advance()) { terms.push_back(itr->GetTerm()); @@ -516,8 +518,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartUtf32WordConnector) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "com.google.android is package"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "com.google.android is package" // ^ ^^ ^^ @@ -533,8 +537,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -550,8 +556,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -568,8 +576,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -588,8 +598,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -607,8 +619,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32WordConnector) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "package com.google.android name"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "package com.google.android name" // ^ ^^ ^^ @@ -630,8 +644,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32OutOfBounds) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -661,13 +677,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText)); + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kText)); + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -683,13 +701,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai)); + segmenter->Segment(kThai, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kThai)); + segmenter->Segment(kThai, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -705,13 +725,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean)); + segmenter->Segment(kKorean, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kKorean)); + segmenter->Segment(kKorean, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -731,13 +753,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText)); + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment(kText)); + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -754,13 +778,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai)); + segmenter->Segment(kThai, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment(kThai)); + segmenter->Segment(kThai, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -777,13 +803,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean)); + segmenter->Segment(kKorean, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment(kKorean)); + segmenter->Segment(kKorean, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -800,7 +828,9 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment("How are you你好吗お元気ですか")); + language_segmenter->Segment( + "How are you你好吗お元気ですか", + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -837,8 +867,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Multiple continuous whitespaces are treated as one. constexpr std::string_view kTextWithSpace = "Hello World"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kTextWithSpace)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator)); // String: "Hello World" // ^ ^ ^ @@ -877,8 +909,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) { // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that // don't have whitespaces as word delimiter. Chinese constexpr std::string_view kChinese = "我每天走路去上班。"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kChinese)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kChinese, LanguageSegmenter::AccessType::kForwardIterator)); // String: "我每天走路去上班。" // ^ ^ ^ ^^ ^ // UTF-8 idx: 0 3 9 15 18 24 @@ -904,8 +938,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Japanese constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kJapanese)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kJapanese, LanguageSegmenter::AccessType::kForwardIterator)); // String: "私は毎日仕事に歩いています。" // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 3 6 12 18212427 33 39 @@ -930,8 +966,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) { language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kKhmer)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kKhmer, LanguageSegmenter::AccessType::kForwardIterator)); // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។" // ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 24 45 69 @@ -957,8 +995,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Thai constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kThai)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kThai, LanguageSegmenter::AccessType::kForwardIterator)); // String: "ฉันเดินไปทำงานทุกวัน" // ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 21 27 42 51 @@ -983,8 +1023,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "package name com.google.android!"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "package name com.google.android!" // ^ ^^ ^^ ^ @@ -1006,8 +1048,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBoundsUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -1037,13 +1081,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText)); + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kText)); + segmenter->Segment(kText, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1061,13 +1107,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai)); + segmenter->Segment(kThai, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kThai)); + segmenter->Segment(kThai, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1084,13 +1132,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean)); + segmenter->Segment(kKorean, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kKorean)); + segmenter->Segment(kKorean, + LanguageSegmenter::AccessType::kForwardIterator)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1107,7 +1157,9 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment("How are you你好吗お元気ですか")); + language_segmenter->Segment( + "How are you你好吗お元気ですか", + LanguageSegmenter::AccessType::kForwardIterator)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -1145,8 +1197,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Multiple continuous whitespaces are treated as one. constexpr std::string_view kTextWithSpace = "Hello World"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kTextWithSpace)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator)); // String: "Hello World" // ^ ^ ^ @@ -1184,8 +1238,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBeforeUtf32) { // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that // don't have whitespaces as word delimiter. Chinese constexpr std::string_view kChinese = "我每天走路去上班。"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kChinese)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kChinese, LanguageSegmenter::AccessType::kForwardIterator)); // String: "我每天走路去上班。" // ^ ^ ^ ^^ // UTF-8 idx: 0 3 9 15 18 @@ -1208,8 +1264,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Japanese constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kJapanese)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kJapanese, LanguageSegmenter::AccessType::kForwardIterator)); // String: "私は毎日仕事に歩いています。" // ^ ^ ^ ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 3 6 12 18212427 33 @@ -1231,8 +1289,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBeforeUtf32) { language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kKhmer)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kKhmer, LanguageSegmenter::AccessType::kForwardIterator)); // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។" // ^ ^ ^ ^ // UTF-8 idx: 0 9 24 45 @@ -1255,8 +1315,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Thai constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment(kThai)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment( + kThai, LanguageSegmenter::AccessType::kForwardIterator)); // String: "ฉันเดินไปทำงานทุกวัน" // ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 21 27 42 51 |