aboutsummaryrefslogtreecommitdiff
path: root/icing/query/suggestion-processor_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/query/suggestion-processor_test.cc')
-rw-r--r--icing/query/suggestion-processor_test.cc324
1 files changed, 324 insertions, 0 deletions
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
new file mode 100644
index 0000000..5e62277
--- /dev/null
+++ b/icing/query/suggestion-processor_test.cc
@@ -0,0 +1,324 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/suggestion-processor.h"
+
+#include "gmock/gmock.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/transform/normalizer-factory.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::Test;
+
+class SuggestionProcessorTest : public Test {
+ protected:
+ SuggestionProcessorTest()
+ : test_dir_(GetTestTempDir() + "/icing"),
+ store_dir_(test_dir_ + "/store"),
+ index_dir_(test_dir_ + "/index") {}
+
+ void SetUp() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/1024 * 1024);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(
+ ULOC_US, jni_cache_.get());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(segmenter_options));
+
+ ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
+ /*max_term_byte_size=*/1000));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+ schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ libtextclassifier3::Status AddTokenToIndex(
+ DocumentId document_id, SectionId section_id,
+ TermMatchType::Code term_match_type, const std::string& token) {
+ Index::Editor editor = index_->Edit(document_id, section_id,
+ term_match_type, /*namespace_id=*/0);
+ auto status = editor.BufferTerm(token.c_str());
+ return status.ok() ? editor.IndexAllBufferedTerms() : status;
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ const std::string test_dir_;
+ const std::string store_dir_;
+ std::unique_ptr<Index> index_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<DocumentStore> document_store_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+ FakeClock fake_clock_;
+
+ private:
+ IcingFilesystem icing_filesystem_;
+ const std::string index_dir_;
+};
+
+constexpr DocumentId kDocumentId0 = 0;
+constexpr SectionId kSectionId2 = 2;
+
+TEST_F(SuggestionProcessorTest, PrependedPrefixTokenTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix(
+ "prefix token should be prepended to the suggestion f");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+ EXPECT_THAT(terms.at(0).content,
+ "prefix token should be prepended to the suggestion foo");
+}
+
+TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("nonExistTerm");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+
+ EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f ");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+
+ EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("F");
+ suggestion_spec.set_num_to_return(10);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(suggestion_spec,
+ /*namespace_ids=*/{}));
+ EXPECT_THAT(terms.at(0).content, "foo");
+
+ suggestion_spec.set_prefix("fO");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor->QuerySuggestions(suggestion_spec,
+ /*namespace_ids=*/{}));
+ EXPECT_THAT(terms.at(0).content, "foo");
+
+ suggestion_spec.set_prefix("Fo");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor->QuerySuggestions(suggestion_spec,
+ /*namespace_ids=*/{}));
+ EXPECT_THAT(terms.at(0).content, "foo");
+
+ suggestion_spec.set_prefix("FO");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor->QuerySuggestions(suggestion_spec,
+ /*namespace_ids=*/{}));
+ EXPECT_THAT(terms.at(0).content, "foo");
+}
+
+TEST_F(SuggestionProcessorTest, OrOperatorPrefixTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "original"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f OR");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+
+ // Last Operator token will be used to query suggestion
+ EXPECT_THAT(terms.at(0).content, "f original");
+}
+
+TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("{f}");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+ EXPECT_THAT(terms, IsEmpty());
+
+ suggestion_spec.set_prefix("[f]");
+ ICING_ASSERT_OK_AND_ASSIGN(terms, suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+ EXPECT_THAT(terms, IsEmpty());
+
+ suggestion_spec.set_prefix("(f)");
+ ICING_ASSERT_OK_AND_ASSIGN(terms, suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+ EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "foo"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f:");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+ EXPECT_THAT(terms, IsEmpty());
+
+ suggestion_spec.set_prefix("f-");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ terms, suggestion_processor->QuerySuggestions(suggestion_spec,
+ /*namespace_ids=*/{}));
+ EXPECT_THAT(terms, IsEmpty());
+}
+
+TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
+ ASSERT_THAT(AddTokenToIndex(kDocumentId0, kSectionId2,
+ TermMatchType::EXACT_ONLY, "original"),
+ IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SuggestionProcessor> suggestion_processor,
+ SuggestionProcessor::Create(index_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("OR OR - :");
+ suggestion_spec.set_num_to_return(10);
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
+ suggestion_processor->QuerySuggestions(
+ suggestion_spec, /*namespace_ids=*/{}));
+ EXPECT_THAT(terms, IsEmpty());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing