aboutsummaryrefslogtreecommitdiff
path: root/icing/tokenization/icu
diff options
context:
space:
mode:
Diffstat (limited to 'icing/tokenization/icu')
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.cc5
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.h2
-rw-r--r--icing/tokenization/icu/icu-language-segmenter_test.cc182
3 files changed, 126 insertions, 63 deletions
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index dc7b0a4..fd790cf 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -325,14 +325,15 @@ IcuLanguageSegmenter::IcuLanguageSegmenter(std::string locale)
: locale_(std::move(locale)) {}
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-IcuLanguageSegmenter::Segment(const std::string_view text) const {
+IcuLanguageSegmenter::Segment(const std::string_view text,
+ LanguageSegmenter::AccessType) const {
return IcuLanguageSegmenterIterator::Create(text, locale_);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
- Segment(text));
+ IcuLanguageSegmenterIterator::Create(text, locale_));
std::vector<std::string_view> terms;
while (iterator->Advance()) {
terms.push_back(iterator->GetTerm());
diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h
index 4115461..f9cfbcb 100644
--- a/icing/tokenization/icu/icu-language-segmenter.h
+++ b/icing/tokenization/icu/icu-language-segmenter.h
@@ -55,7 +55,7 @@ class IcuLanguageSegmenter : public LanguageSegmenter {
// An iterator of terms on success
// INTERNAL_ERROR if any error occurs
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
- Segment(std::string_view text) const override;
+ Segment(std::string_view text, LanguageSegmenter::AccessType) const override;
// The segmentation depends on the language detected in the input text.
//
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 6771050..c88b992 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -419,8 +419,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
// iterator is done.
text_with_spaces = absl_ports::StrCat(std::string(kNumSeparators, ' '),
"Hello", " ", "World");
- ICING_ASSERT_OK_AND_ASSIGN(auto itr,
- language_segmenter->Segment(text_with_spaces));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto itr,
+ language_segmenter->Segment(
+ text_with_spaces, LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> terms;
while (itr->Advance()) {
terms.push_back(itr->GetTerm());
@@ -516,8 +518,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartUtf32WordConnector) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "com.google.android is package";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "com.google.android is package"
// ^ ^^ ^^
@@ -533,8 +537,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -550,8 +556,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -568,8 +576,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -588,8 +598,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -607,8 +619,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32WordConnector) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "package com.google.android name";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "package com.google.android name"
// ^ ^^ ^^
@@ -630,8 +644,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32OutOfBounds) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -661,13 +677,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText));
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kText));
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -683,13 +701,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai));
+ segmenter->Segment(kThai,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kThai));
+ segmenter->Segment(kThai,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -705,13 +725,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean));
+ segmenter->Segment(kKorean,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kKorean));
+ segmenter->Segment(kKorean,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -731,13 +753,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText));
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(kText));
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -754,13 +778,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai));
+ segmenter->Segment(kThai,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(kThai));
+ segmenter->Segment(kThai,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -777,13 +803,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean));
+ segmenter->Segment(kKorean,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(kKorean));
+ segmenter->Segment(kKorean,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -800,7 +828,9 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment("How are you你好吗お元気ですか"));
+ language_segmenter->Segment(
+ "How are you你好吗お元気ですか",
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -837,8 +867,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Multiple continuous whitespaces are treated as one.
constexpr std::string_view kTextWithSpace = "Hello World";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kTextWithSpace));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator));
// String: "Hello World"
// ^ ^ ^
@@ -877,8 +909,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) {
// CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
// don't have whitespaces as word delimiter. Chinese
constexpr std::string_view kChinese = "我每天走路去上班。";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kChinese));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kChinese, LanguageSegmenter::AccessType::kForwardIterator));
// String: "我每天走路去上班。"
// ^ ^ ^ ^^ ^
// UTF-8 idx: 0 3 9 15 18 24
@@ -904,8 +938,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Japanese
constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kJapanese));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kJapanese, LanguageSegmenter::AccessType::kForwardIterator));
// String: "私は毎日仕事に歩いています。"
// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 3 6 12 18212427 33 39
@@ -930,8 +966,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) {
language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kKhmer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kKhmer, LanguageSegmenter::AccessType::kForwardIterator));
// String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
// ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 24 45 69
@@ -957,8 +995,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Thai
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kThai));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kThai, LanguageSegmenter::AccessType::kForwardIterator));
// String: "ฉันเดินไปทำงานทุกวัน"
// ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 21 27 42 51
@@ -983,8 +1023,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "package name com.google.android!";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "package name com.google.android!"
// ^ ^^ ^^ ^
@@ -1006,8 +1048,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBoundsUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -1037,13 +1081,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText));
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kText));
+ segmenter->Segment(kText,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1061,13 +1107,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai));
+ segmenter->Segment(kThai,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kThai));
+ segmenter->Segment(kThai,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1084,13 +1132,15 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean));
+ segmenter->Segment(kKorean,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kKorean));
+ segmenter->Segment(kKorean,
+ LanguageSegmenter::AccessType::kForwardIterator));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1107,7 +1157,9 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment("How are you你好吗お元気ですか"));
+ language_segmenter->Segment(
+ "How are you你好吗お元気ですか",
+ LanguageSegmenter::AccessType::kForwardIterator));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -1145,8 +1197,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Multiple continuous whitespaces are treated as one.
constexpr std::string_view kTextWithSpace = "Hello World";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kTextWithSpace));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator));
// String: "Hello World"
// ^ ^ ^
@@ -1184,8 +1238,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBeforeUtf32) {
// CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
// don't have whitespaces as word delimiter. Chinese
constexpr std::string_view kChinese = "我每天走路去上班。";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kChinese));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kChinese, LanguageSegmenter::AccessType::kForwardIterator));
// String: "我每天走路去上班。"
// ^ ^ ^ ^^
// UTF-8 idx: 0 3 9 15 18
@@ -1208,8 +1264,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Japanese
constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kJapanese));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kJapanese, LanguageSegmenter::AccessType::kForwardIterator));
// String: "私は毎日仕事に歩いています。"
// ^ ^ ^ ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 3 6 12 18212427 33
@@ -1231,8 +1289,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBeforeUtf32) {
language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kKhmer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kKhmer, LanguageSegmenter::AccessType::kForwardIterator));
// String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
// ^ ^ ^ ^
// UTF-8 idx: 0 9 24 45
@@ -1255,8 +1315,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Thai
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(kThai));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(
+ kThai, LanguageSegmenter::AccessType::kForwardIterator));
// String: "ฉันเดินไปทำงานทุกวัน"
// ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 21 27 42 51