aboutsummaryrefslogtreecommitdiff
path: root/icing
diff options
context:
space:
mode:
authorAlex Saveliev <alexsav@google.com>2023-02-07 20:02:36 -0800
committerAlex Saveliev <alexsav@google.com>2023-02-07 20:25:19 -0800
commit5a41ca990be33387b0d5d15836a465bbe5ff5a28 (patch)
tree10e0e3d043aa1ba8effa3964ccf3287d83961cb0 /icing
parentcccafab8dfcae94d7072eb49ea971e3c688bdfc4 (diff)
downloadicing-5a41ca990be33387b0d5d15836a465bbe5ff5a28.tar.gz
Update icing from upstream
====================================================================== Adds a proto change for the delete propagation option ====================================================================== [ez] Change version to magic for PersistentHashMap ====================================================================== [iOS][testing][nitro] Disabling ICU language segmenter_test. ====================================================================== 1. Add support for segmentation in the QueryVisitor. ====================================================================== Support the new double list type in ScoreExpression ====================================================================== Pass JoinChildrenFetcher from IcingSearchEngine all the way down to ScoringVisitor ====================================================================== Refactor the logic of Icing Joins so that nested search and scoring will be performed before the parent ====================================================================== Add lite-index thread-safety tests. ====================================================================== Split IcingSearchEngineTest into separate tests to cover specific apis: ====================================================================== Fix index tests TearDown method. ====================================================================== Improve query concurrency by providing a finer-grained lock around the LiteIndex. ====================================================================== Fix Icing normalization bug ====================================================================== [ez] Fix integer overflow error for IntegerIndexStorage ====================================================================== [NumericSearch][Storage][11/x] Implement Reset and destructor for IntegerIndex ====================================================================== [NumericSearch][Storage][10/x] Add class IntegerIndex ====================================================================== Refactor NumericIndex based on PersistentStorage ====================================================================== Refactor IntegerIndexStorage based on PersistentStorage ====================================================================== Add "working_path" into PersistentStorage ====================================================================== Refactor PersistentHashMap based on PersistentStorage ====================================================================== Create virtual class PersistentStorage for refactoring ====================================================================== Avoids returning reference to local temporary object. ====================================================================== LSC: Clean up references to the legacy protobuf compat library ====================================================================== Fix time complexity regression for snippet retriever Bug: 256022027 Bug: 193919210 Bug: 266132035 Bug: 208654892 Bug: 261474063 Bug: 266103594 Bug: 146008613 Bug: 253182853 Bug: 266204868 Bug: 249829533 Bug: 266665956 Bug: 265258364 Change-Id: Ib2398c5097b6a2a57900e2ad4e3737502aa13820
Diffstat (limited to 'icing')
-rw-r--r--icing/file/file-backed-proto-log.h6
-rw-r--r--icing/file/file-backed-proto.h4
-rw-r--r--icing/file/filesystem.cc3
-rw-r--r--icing/file/persistent-hash-map.cc458
-rw-r--r--icing/file/persistent-hash-map.h174
-rw-r--r--icing/file/persistent-hash-map_test.cc353
-rw-r--r--icing/file/persistent-storage.cc55
-rw-r--r--icing/file/persistent-storage.h338
-rw-r--r--icing/file/portable-file-backed-proto-log.h5
-rw-r--r--icing/icing-search-engine.cc81
-rw-r--r--icing/icing-search-engine.h10
-rw-r--r--icing/icing-search-engine_delete_test.cc768
-rw-r--r--icing/icing-search-engine_initialization_test.cc1920
-rw-r--r--icing/icing-search-engine_optimize_test.cc974
-rw-r--r--icing/icing-search-engine_put_test.cc481
-rw-r--r--icing/icing-search-engine_schema_test.cc1698
-rw-r--r--icing/icing-search-engine_search_test.cc4143
-rw-r--r--icing/icing-search-engine_suggest_test.cc1304
-rw-r--r--icing/icing-search-engine_test.cc9893
-rw-r--r--icing/index/index-processor_benchmark.cc118
-rw-r--r--icing/index/index-processor_test.cc35
-rw-r--r--icing/index/index_test.cc1
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc4
-rw-r--r--icing/index/lite/lite-index.cc59
-rw-r--r--icing/index/lite/lite-index.h144
-rw-r--r--icing/index/lite/lite-index_test.cc6
-rw-r--r--icing/index/lite/lite-index_thread-safety_test.cc400
-rw-r--r--icing/index/main/main-index_test.cc2
-rw-r--r--icing/index/numeric/dummy-numeric-index.h69
-rw-r--r--icing/index/numeric/integer-index-storage.cc292
-rw-r--r--icing/index/numeric/integer-index-storage.h195
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc205
-rw-r--r--icing/index/numeric/integer-index.cc242
-rw-r--r--icing/index/numeric/integer-index.h190
-rw-r--r--icing/index/numeric/numeric-index.h40
-rw-r--r--icing/index/numeric/numeric-index_test.cc23
-rw-r--r--icing/index/string-section-indexing-handler.cc2
-rw-r--r--icing/jni/icing-search-engine-jni.cc6
-rw-r--r--icing/join/join-children-fetcher.cc39
-rw-r--r--icing/join/join-children-fetcher.h73
-rw-r--r--icing/join/join-children-fetcher_test.cc82
-rw-r--r--icing/join/join-processor.cc47
-rw-r--r--icing/join/join-processor.h14
-rw-r--r--icing/join/join-processor_test.cc49
-rw-r--r--icing/portable/equals-proto.h2
-rw-r--r--icing/portable/gzip_stream.h20
-rw-r--r--icing/query/advanced_query_parser/query-visitor.cc168
-rw-r--r--icing/query/advanced_query_parser/query-visitor.h90
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc333
-rw-r--r--icing/query/query-processor.cc26
-rw-r--r--icing/query/query-processor_benchmark.cc24
-rw-r--r--icing/query/query-processor_test.cc36
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer.cc10
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer.h4
-rw-r--r--icing/scoring/advanced_scoring/score-expression.cc85
-rw-r--r--icing/scoring/advanced_scoring/score-expression.h80
-rw-r--r--icing/scoring/advanced_scoring/score-expression_test.cc104
-rw-r--r--icing/scoring/advanced_scoring/scoring-visitor.h9
-rw-r--r--icing/scoring/scorer-factory.cc5
-rw-r--r--icing/scoring/scorer-factory.h4
-rw-r--r--icing/scoring/scoring-processor.cc13
-rw-r--r--icing/scoring/scoring-processor.h4
-rw-r--r--icing/store/document-store.cc11
-rw-r--r--icing/store/document-store.h4
-rw-r--r--icing/store/document-store_test.cc4
-rw-r--r--icing/store/key-mapper_benchmark.cc9
-rw-r--r--icing/store/key-mapper_test.cc22
-rw-r--r--icing/store/persistent-hash-map-key-mapper.h54
-rw-r--r--icing/transform/icu/icu-normalizer.cc111
-rw-r--r--icing/transform/icu/icu-normalizer.h26
-rw-r--r--icing/transform/icu/icu-normalizer_test.cc13
71 files changed, 14926 insertions, 11350 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 78236ba..095f832 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -40,7 +40,6 @@
#include <string_view>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
@@ -53,6 +52,7 @@
#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace icing {
namespace lib {
@@ -575,8 +575,8 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
return absl_ports::NotFoundError("The proto data has been erased.");
}
- google::protobuf::io::ArrayInputStream proto_stream(
- mmapped_file.mutable_region(), stored_size);
+ google::protobuf::io::ArrayInputStream proto_stream(mmapped_file.mutable_region(),
+ stored_size);
// Deserialize proto
ProtoT proto;
diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
index d7d9bad..8deb7a6 100644
--- a/icing/file/file-backed-proto.h
+++ b/icing/file/file-backed-proto.h
@@ -66,10 +66,10 @@ class FileBackedProto {
// Reset the internal file_path for the file backed proto.
// Example use:
// auto file_backed_proto1 = *FileBackedProto<Proto>::Create(...);
- // auto file_backed_proto2 = *FileBackedProto<Proto>::Create(...);
+ // auto file_backed_google::protobuf = *FileBackedProto<Proto>::Create(...);
// filesystem.SwapFiles(file1, file2);
// file_backed_proto1.SetSwappedFilepath(file2);
- // file_backed_proto2.SetSwappedFilepath(file1);
+ // file_backed_google::protobuf.SetSwappedFilepath(file1);
void SetSwappedFilepath(std::string_view swapped_to_file_path) {
file_path_ = swapped_to_file_path;
}
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 10b77db..c83a351 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -127,6 +127,9 @@ bool ListDirectoryInternal(const char* dir_name,
return false;
}
+ // According to linux man page
+ // (https://man7.org/linux/man-pages/man3/readdir.3.html#RETURN_VALUE), dirent
+ // may be statically allocated, so don't free it.
dirent* p;
// readdir's implementation seems to be thread safe.
while ((p = readdir(dir)) != nullptr) {
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 0af5e2f..14a1251 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -58,125 +58,26 @@ libtextclassifier3::StatusOr<int32_t> HashKeyToBucketIndex(
return static_cast<int32_t>(std::hash<std::string_view>()(key) % num_buckets);
}
-// Helper function to PWrite crcs and info to metadata_file_path. Note that
-// metadata_file_path will be the normal or temporary (for branching use when
-// rehashing) metadata file path.
-libtextclassifier3::Status WriteMetadata(const Filesystem& filesystem,
- const char* metadata_file_path,
- const PersistentHashMap::Crcs* crcs,
- const PersistentHashMap::Info* info) {
- ScopedFd sfd(filesystem.OpenForWrite(metadata_file_path));
- if (!sfd.is_valid()) {
- return absl_ports::InternalError("Failed to create metadata file");
- }
-
- // Write crcs and info. File layout: <Crcs><Info>
- if (!filesystem.PWrite(sfd.get(), PersistentHashMap::Crcs::kFileOffset, crcs,
- sizeof(PersistentHashMap::Crcs))) {
- return absl_ports::InternalError("Failed to write crcs into metadata file");
- }
- // Note that PWrite won't change the file offset, so we need to specify
- // the correct offset when writing Info.
- if (!filesystem.PWrite(sfd.get(), PersistentHashMap::Info::kFileOffset, info,
- sizeof(PersistentHashMap::Info))) {
- return absl_ports::InternalError("Failed to write info into metadata file");
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-// Helper function to update checksums from info and storages to a Crcs
-// instance. Note that storages will be the normal instances used by
-// PersistentHashMap, or the temporary instances (for branching use when
-// rehashing).
-libtextclassifier3::Status UpdateChecksums(
- PersistentHashMap::Crcs* crcs, PersistentHashMap::Info* info,
- FileBackedVector<PersistentHashMap::Bucket>* bucket_storage,
- FileBackedVector<PersistentHashMap::Entry>* entry_storage,
- FileBackedVector<char>* kv_storage) {
- // Compute crcs
- ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
- bucket_storage->ComputeChecksum());
- ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
- entry_storage->ComputeChecksum());
- ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage->ComputeChecksum());
-
- crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
- crcs->component_crcs.bucket_storage_crc = bucket_storage_crc.Get();
- crcs->component_crcs.entry_storage_crc = entry_storage_crc.Get();
- crcs->component_crcs.kv_storage_crc = kv_storage_crc.Get();
- crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
-
- return libtextclassifier3::Status::OK;
-}
-
-// Helper function to validate checksums.
-libtextclassifier3::Status ValidateChecksums(
- const PersistentHashMap::Crcs* crcs, const PersistentHashMap::Info* info,
- FileBackedVector<PersistentHashMap::Bucket>* bucket_storage,
- FileBackedVector<PersistentHashMap::Entry>* entry_storage,
- FileBackedVector<char>* kv_storage) {
- if (crcs->all_crc != crcs->component_crcs.ComputeChecksum().Get()) {
- return absl_ports::FailedPreconditionError(
- "Invalid all crc for PersistentHashMap");
- }
-
- if (crcs->component_crcs.info_crc != info->ComputeChecksum().Get()) {
- return absl_ports::FailedPreconditionError(
- "Invalid info crc for PersistentHashMap");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
- bucket_storage->ComputeChecksum());
- if (crcs->component_crcs.bucket_storage_crc != bucket_storage_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with PersistentHashMap bucket storage");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
- entry_storage->ComputeChecksum());
- if (crcs->component_crcs.entry_storage_crc != entry_storage_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with PersistentHashMap entry storage");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage->ComputeChecksum());
- if (crcs->component_crcs.kv_storage_crc != kv_storage_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with PersistentHashMap key value storage");
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-// Since metadata/bucket/entry storages should be branched when rehashing, we
-// have to store them together under the same sub directory
-// ("<base_dir>/<sub_dir>"). On the other hand, key-value storage won't be
-// branched and it will be stored under <base_dir>.
-//
// The following 4 methods are helper functions to get the correct path of
-// metadata/bucket/entry/key-value storages, according to the given base
-// directory and sub directory.
-std::string GetMetadataFilePath(std::string_view base_dir,
- std::string_view sub_dir) {
- return absl_ports::StrCat(base_dir, "/", sub_dir, "/",
- PersistentHashMap::kFilePrefix, ".m");
+// metadata/bucket/entry/key-value storages, according to the given working
+// directory path.
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+ ".m");
}
-std::string GetBucketStorageFilePath(std::string_view base_dir,
- std::string_view sub_dir) {
- return absl_ports::StrCat(base_dir, "/", sub_dir, "/",
- PersistentHashMap::kFilePrefix, ".b");
+std::string GetBucketStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+ ".b");
}
-std::string GetEntryStorageFilePath(std::string_view base_dir,
- std::string_view sub_dir) {
- return absl_ports::StrCat(base_dir, "/", sub_dir, "/",
- PersistentHashMap::kFilePrefix, ".e");
+std::string GetEntryStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
+ ".e");
}
-std::string GetKeyValueStorageFilePath(std::string_view base_dir) {
- return absl_ports::StrCat(base_dir, "/", PersistentHashMap::kFilePrefix,
+std::string GetKeyValueStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix,
".k");
}
@@ -234,29 +135,31 @@ bool PersistentHashMap::Options::IsValid() const {
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::Create(const Filesystem& filesystem,
- std::string_view base_dir, const Options& options) {
+ std::string working_path, Options options) {
if (!options.IsValid()) {
return absl_ports::InvalidArgumentError(
"Invalid PersistentHashMap options");
}
- if (!filesystem.FileExists(
- GetMetadataFilePath(base_dir, kSubDirectory).c_str()) ||
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(GetBucketStorageFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(GetEntryStorageFilePath(working_path).c_str()) ||
!filesystem.FileExists(
- GetBucketStorageFilePath(base_dir, kSubDirectory).c_str()) ||
- !filesystem.FileExists(
- GetEntryStorageFilePath(base_dir, kSubDirectory).c_str()) ||
- !filesystem.FileExists(GetKeyValueStorageFilePath(base_dir).c_str())) {
- // TODO: erase all files if missing any.
- return InitializeNewFiles(filesystem, base_dir, options);
- }
- return InitializeExistingFiles(filesystem, base_dir, options);
+ GetKeyValueStorageFilePath(working_path).c_str())) {
+ // Discard working_path if any of them is missing, and reinitialize.
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+ return InitializeNewFiles(filesystem, std::move(working_path),
+ std::move(options));
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path),
+ std::move(options));
}
PersistentHashMap::~PersistentHashMap() {
if (!PersistToDisk().ok()) {
ICING_LOG(WARNING)
- << "Failed to persist hash map to disk while destructing " << base_dir_;
+ << "Failed to persist hash map to disk while destructing "
+ << working_path_;
}
}
@@ -278,7 +181,7 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
ICING_ASSIGN_OR_RETURN(const Entry* entry,
entry_storage_->Get(idx_pair.target_entry_index));
- int32_t kv_len = key.length() + 1 + info()->value_type_size;
+ int32_t kv_len = key.length() + 1 + info().value_type_size;
int32_t value_offset = key.length() + 1;
ICING_ASSIGN_OR_RETURN(
typename FileBackedVector<char>::MutableArrayView mutable_kv_arr,
@@ -286,7 +189,7 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
// It is the same key and value_size is fixed, so we can directly overwrite
// serialized value.
mutable_kv_arr.SetArray(value_offset, reinterpret_cast<const char*>(value),
- info()->value_type_size);
+ info().value_type_size);
return libtextclassifier3::Status::OK;
}
@@ -319,8 +222,8 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key,
ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_));
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Key not found in PersistentHashMap ", working_path_));
}
return CopyEntryValue(idx_pair.target_entry_index, value);
@@ -335,8 +238,8 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_));
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Key not found in PersistentHashMap ", working_path_));
}
ICING_ASSIGN_OR_RETURN(
@@ -375,7 +278,7 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
// Zero out the key value bytes. It is necessary for iterator to iterate
// through kv_storage and handle deleted keys properly.
- int32_t kv_len = key.length() + 1 + info()->value_type_size;
+ int32_t kv_len = key.length() + 1 + info().value_type_size;
ICING_RETURN_IF_ERROR(kv_storage_->Set(
mutable_target_entry.Get().key_value_index(), kv_len, '\0'));
@@ -383,23 +286,7 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex);
mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex);
- ++(info()->num_deleted_entries);
-
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status PersistentHashMap::PersistToDisk() {
- ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk());
- ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk());
- ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk());
-
- ICING_RETURN_IF_ERROR(UpdateChecksums(crcs(), info(), bucket_storage_.get(),
- entry_storage_.get(),
- kv_storage_.get()));
- // Changes should have been applied to the underlying file when using
- // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
- // extra safety step to ensure they are written out.
- ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
+ ++(info().num_deleted_entries);
return libtextclassifier3::Status::OK;
}
@@ -415,8 +302,7 @@ libtextclassifier3::StatusOr<int64_t> PersistentHashMap::GetDiskUsage() const {
int64_t total = bucket_storage_disk_usage + entry_storage_disk_usage +
kv_storage_disk_usage;
Filesystem::IncrementByOrSetInvalid(
- filesystem_->GetDiskUsage(
- GetMetadataFilePath(base_dir_, kSubDirectory).c_str()),
+ filesystem_.GetDiskUsage(GetMetadataFilePath(working_path_).c_str()),
&total);
if (total < 0 || total == Filesystem::kBadFileSize) {
@@ -438,23 +324,15 @@ libtextclassifier3::StatusOr<int64_t> PersistentHashMap::GetElementsSize()
kv_storage_elements_size;
}
-libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeChecksum() {
- Crcs* crcs_ptr = crcs();
- ICING_RETURN_IF_ERROR(UpdateChecksums(crcs_ptr, info(), bucket_storage_.get(),
- entry_storage_.get(),
- kv_storage_.get()));
- return Crc32(crcs_ptr->all_crc);
-}
-
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
- std::string_view base_dir,
- const Options& options) {
- // Create directory.
- const std::string dir_path = absl_ports::StrCat(base_dir, "/", kSubDirectory);
- if (!filesystem.CreateDirectoryRecursively(dir_path.c_str())) {
+ std::string&& working_path,
+ Options&& options) {
+ // PersistentHashMap uses working_path as working directory path.
+ // Create working directory.
+ if (!filesystem.CreateDirectory(working_path.c_str())) {
return absl_ports::InternalError(
- absl_ports::StrCat("Failed to create directory: ", dir_path));
+ absl_ports::StrCat("Failed to create directory: ", working_path));
}
int32_t max_num_buckets_required =
@@ -469,7 +347,7 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
FileBackedVector<Bucket>::Create(
- filesystem, GetBucketStorageFilePath(base_dir, kSubDirectory),
+ filesystem, GetBucketStorageFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
pre_mapping_mmap_size));
@@ -477,75 +355,77 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries;
max_file_size =
pre_mapping_mmap_size + FileBackedVector<Entry>::Header::kHeaderSize;
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<FileBackedVector<Entry>> entry_storage,
- FileBackedVector<Entry>::Create(
- filesystem, GetEntryStorageFilePath(base_dir, kSubDirectory),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+ FileBackedVector<Entry>::Create(
+ filesystem, GetEntryStorageFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size, pre_mapping_mmap_size));
// Initialize kv_storage
pre_mapping_mmap_size =
options.average_kv_byte_size * options.max_num_entries;
max_file_size =
pre_mapping_mmap_size + FileBackedVector<char>::Header::kHeaderSize;
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<char>> kv_storage,
- FileBackedVector<char>::Create(
- filesystem, GetKeyValueStorageFilePath(base_dir),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
- max_file_size, pre_mapping_mmap_size));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> kv_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetKeyValueStorageFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ pre_mapping_mmap_size));
// Initialize buckets.
ICING_RETURN_IF_ERROR(bucket_storage->Set(
/*idx=*/0, /*len=*/options.init_num_buckets, Bucket()));
ICING_RETURN_IF_ERROR(bucket_storage->PersistToDisk());
- // Create and initialize new info
- Info new_info;
- new_info.version = kVersion;
- new_info.value_type_size = options.value_type_size;
- new_info.max_load_factor_percent = options.max_load_factor_percent;
- new_info.num_deleted_entries = 0;
- new_info.num_deleted_key_value_bytes = 0;
-
- // Compute checksums
- Crcs new_crcs;
- ICING_RETURN_IF_ERROR(UpdateChecksums(&new_crcs, &new_info,
- bucket_storage.get(),
- entry_storage.get(), kv_storage.get()));
-
- const std::string metadata_file_path =
- GetMetadataFilePath(base_dir, kSubDirectory);
- // Write new metadata file
- ICING_RETURN_IF_ERROR(WriteMetadata(filesystem, metadata_file_path.c_str(),
- &new_crcs, &new_info));
-
- // Mmap the content of the crcs and info.
- ICING_ASSIGN_OR_RETURN(MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(
- filesystem, metadata_file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap(
- /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info)));
-
- return std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
- filesystem, base_dir, options, std::move(metadata_mmapped_file),
- std::move(bucket_storage), std::move(entry_storage),
- std::move(kv_storage)));
+ // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+ /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+ // Create instance.
+ auto new_persistent_hash_map =
+ std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+ filesystem, std::move(working_path), std::move(options),
+ std::move(metadata_mmapped_file), std::move(bucket_storage),
+ std::move(entry_storage), std::move(kv_storage)));
+ // Initialize info content by writing mapped memory directly.
+ Info& info_ref = new_persistent_hash_map->info();
+ info_ref.magic = Info::kMagic;
+ info_ref.value_type_size = new_persistent_hash_map->options_.value_type_size;
+ info_ref.max_load_factor_percent =
+ new_persistent_hash_map->options_.max_load_factor_percent;
+ info_ref.num_deleted_entries = 0;
+ info_ref.num_deleted_key_value_bytes = 0;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_persistent_hash_map->InitializeNewStorage());
+
+ return new_persistent_hash_map;
}
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
- std::string_view base_dir,
- const Options& options) {
- // Mmap the content of the crcs and info.
+ std::string&& working_path,
+ Options&& options) {
+ // Initialize metadata file
ICING_ASSIGN_OR_RETURN(
MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(
- filesystem, GetMetadataFilePath(base_dir, kSubDirectory),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap(
- /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info)));
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
+ return absl_ports::FailedPreconditionError("Incorrect metadata file size");
+ }
int32_t max_num_buckets_required = CalculateNumBucketsRequired(
options.max_num_entries, options.max_load_factor_percent);
@@ -557,7 +437,7 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
FileBackedVector<Bucket>::Create(
- filesystem, GetBucketStorageFilePath(base_dir, kSubDirectory),
+ filesystem, GetBucketStorageFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
pre_mapping_mmap_size));
@@ -565,77 +445,113 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries;
max_file_size =
pre_mapping_mmap_size + FileBackedVector<Entry>::Header::kHeaderSize;
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<FileBackedVector<Entry>> entry_storage,
- FileBackedVector<Entry>::Create(
- filesystem, GetEntryStorageFilePath(base_dir, kSubDirectory),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
- pre_mapping_mmap_size));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+ FileBackedVector<Entry>::Create(
+ filesystem, GetEntryStorageFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size, pre_mapping_mmap_size));
// Initialize kv_storage
pre_mapping_mmap_size =
options.average_kv_byte_size * options.max_num_entries;
max_file_size =
pre_mapping_mmap_size + FileBackedVector<char>::Header::kHeaderSize;
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<char>> kv_storage,
- FileBackedVector<char>::Create(
- filesystem, GetKeyValueStorageFilePath(base_dir),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
- max_file_size, pre_mapping_mmap_size));
-
- Crcs* crcs_ptr = reinterpret_cast<Crcs*>(
- metadata_mmapped_file.mutable_region() + Crcs::kFileOffset);
- Info* info_ptr = reinterpret_cast<Info*>(
- metadata_mmapped_file.mutable_region() + Info::kFileOffset);
-
- // Value type size should be consistent.
- if (options.value_type_size != info_ptr->value_type_size) {
- return absl_ports::FailedPreconditionError("Incorrect value type size");
- }
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<char>> kv_storage,
+ FileBackedVector<char>::Create(
+ filesystem, GetKeyValueStorageFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ pre_mapping_mmap_size));
- // Current # of entries should not exceed options.max_num_entries
- // We compute max_file_size of 3 storages by options.max_num_entries. Since we
- // won't recycle space of deleted entries (and key-value bytes), they're still
- // occupying space in storages. Even if # of "active" entries doesn't exceed
- // options.max_num_entries, the new kvp to be inserted still potentially
- // exceeds max_file_size.
- // Therefore, we should use entry_storage->num_elements() instead of # of
+ // Create instance.
+ auto persistent_hash_map =
+ std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+ filesystem, std::move(working_path), std::move(options),
+ std::move(metadata_mmapped_file), std::move(bucket_storage),
+ std::move(entry_storage), std::move(kv_storage)));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(persistent_hash_map->InitializeExistingStorage());
+
+ // Validate other values of info and options.
+ // Current # of entries should not exceed options_.max_num_entries
+ // We compute max_file_size of 3 storages by options_.max_num_entries. Since
+ // we won't recycle space of deleted entries (and key-value bytes), they're
+ // still occupying space in storages. Even if # of "active" entries doesn't
+ // exceed options_.max_num_entries, the new kvp to be inserted still
+ // potentially exceeds max_file_size.
+ // Therefore, we should use entry_storage_->num_elements() instead of # of
// "active" entries
- // (i.e. entry_storage->num_elements() - info_ptr->num_deleted_entries) to
+ // (i.e. entry_storage_->num_elements() - info_ptr->num_deleted_entries) to
// check. This feature avoids storages being grown extremely large when there
// are many Delete() and Put() operations.
- if (entry_storage->num_elements() > options.max_num_entries) {
+ if (persistent_hash_map->entry_storage_->num_elements() >
+ persistent_hash_map->options_.max_num_entries) {
return absl_ports::FailedPreconditionError(
"Current # of entries exceeds max num entries");
}
- // Validate checksums of info and 3 storages.
- ICING_RETURN_IF_ERROR(
- ValidateChecksums(crcs_ptr, info_ptr, bucket_storage.get(),
- entry_storage.get(), kv_storage.get()));
+ // Magic should be the same.
+ if (persistent_hash_map->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError(
+ "PersistentHashMap header magic mismatch");
+ }
+
+ // Value type size should be consistent.
+ if (persistent_hash_map->options_.value_type_size !=
+ persistent_hash_map->info().value_type_size) {
+ return absl_ports::FailedPreconditionError("Incorrect value type size");
+ }
// Allow max_load_factor_percent_ change.
- if (options.max_load_factor_percent != info_ptr->max_load_factor_percent) {
+ if (persistent_hash_map->options_.max_load_factor_percent !=
+ persistent_hash_map->info().max_load_factor_percent) {
ICING_VLOG(2) << "Changing max_load_factor_percent from "
- << info_ptr->max_load_factor_percent << " to "
- << options.max_load_factor_percent;
+ << persistent_hash_map->info().max_load_factor_percent
+ << " to "
+ << persistent_hash_map->options_.max_load_factor_percent;
+
+ persistent_hash_map->info().max_load_factor_percent =
+ persistent_hash_map->options_.max_load_factor_percent;
+ ICING_RETURN_IF_ERROR(
+ persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false));
- info_ptr->max_load_factor_percent = options.max_load_factor_percent;
- crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get();
- crcs_ptr->all_crc = crcs_ptr->component_crcs.ComputeChecksum().Get();
- ICING_RETURN_IF_ERROR(metadata_mmapped_file.PersistToDisk());
+ ICING_RETURN_IF_ERROR(persistent_hash_map->PersistToDisk());
}
- auto persistent_hash_map =
- std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
- filesystem, base_dir, options, std::move(metadata_mmapped_file),
- std::move(bucket_storage), std::move(entry_storage),
- std::move(kv_storage)));
- ICING_RETURN_IF_ERROR(
- persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false));
return persistent_hash_map;
}
+libtextclassifier3::Status PersistentHashMap::PersistStoragesToDisk() {
+ ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk());
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status PersistentHashMap::PersistMetadataToDisk() {
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeInfoChecksum() {
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+PersistentHashMap::ComputeStoragesChecksum() {
+ // Compute crcs
+ ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
+ bucket_storage_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc,
+ entry_storage_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage_->ComputeChecksum());
+
+ return Crc32(bucket_storage_crc.Get() ^ entry_storage_crc.Get() ^
+ kv_storage_crc.Get());
+}
+
libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair>
PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx,
std::string_view key) const {
@@ -674,7 +590,7 @@ libtextclassifier3::Status PersistentHashMap::CopyEntryValue(
ICING_ASSIGN_OR_RETURN(const char* kv_arr,
kv_storage_->Get(entry->key_value_index()));
int32_t value_offset = strlen(kv_arr) + 1;
- memcpy(value, kv_arr + value_offset, info()->value_type_size);
+ memcpy(value, kv_arr + value_offset, info().value_type_size);
return libtextclassifier3::Status::OK;
}
@@ -702,7 +618,7 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
// Append new key value.
int32_t new_kv_idx = kv_storage_->num_elements();
- int32_t kv_len = key.size() + 1 + info()->value_type_size;
+ int32_t kv_len = key.size() + 1 + info().value_type_size;
int32_t value_offset = key.size() + 1;
ICING_ASSIGN_OR_RETURN(
typename FileBackedVector<char>::MutableArrayView mutable_new_kv_arr,
@@ -711,7 +627,7 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
mutable_new_kv_arr.SetArray(/*idx=*/key.size(), "\0", 1);
mutable_new_kv_arr.SetArray(/*idx=*/value_offset,
reinterpret_cast<const char*>(value),
- info()->value_type_size);
+ info().value_type_size);
// Append new entry.
int32_t new_entry_idx = entry_storage_->num_elements();
@@ -727,7 +643,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
int32_t new_num_bucket = bucket_storage_->num_elements();
while (new_num_bucket <= Bucket::kMaxNumBuckets / 2 &&
size() > static_cast<int64_t>(new_num_bucket) *
- info()->max_load_factor_percent / 100) {
+ info().max_load_factor_percent / 100) {
new_num_bucket *= 2;
}
@@ -764,7 +680,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
entry_storage_->TruncateTo(entry_idx);
}
- info()->num_deleted_entries = 0;
+ info().num_deleted_entries = 0;
return libtextclassifier3::Status::OK;
}
@@ -774,7 +690,7 @@ bool PersistentHashMap::Iterator::Advance() {
// key value pair. In the first round (after construction), curr_key_len_
// is 0, so don't jump over anything.
if (curr_key_len_ != 0) {
- curr_kv_idx_ += curr_key_len_ + 1 + map_->info()->value_type_size;
+ curr_kv_idx_ += curr_key_len_ + 1 + map_->info().value_type_size;
curr_key_len_ = 0;
}
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
index 57fa070..a6d14bb 100644
--- a/icing/file/persistent-hash-map.h
+++ b/icing/file/persistent-hash-map.h
@@ -24,6 +24,7 @@
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -34,7 +35,7 @@ namespace lib {
// Key and value can be any type, but callers should serialize key/value by
// themselves and pass raw bytes into the hash map, and the serialized key
// should not contain termination character '\0'.
-class PersistentHashMap {
+class PersistentHashMap : public PersistentStorage {
public:
// For iterating through persistent hash map. The order is not guaranteed.
//
@@ -80,45 +81,15 @@ class PersistentHashMap {
friend class PersistentHashMap;
};
- // Crcs and Info will be written into the metadata file.
- // File layout: <Crcs><Info>
- // Crcs
- struct Crcs {
- static constexpr int32_t kFileOffset = 0;
-
- struct ComponentCrcs {
- uint32_t info_crc;
- uint32_t bucket_storage_crc;
- uint32_t entry_storage_crc;
- uint32_t kv_storage_crc;
-
- bool operator==(const ComponentCrcs& other) const {
- return info_crc == other.info_crc &&
- bucket_storage_crc == other.bucket_storage_crc &&
- entry_storage_crc == other.entry_storage_crc &&
- kv_storage_crc == other.kv_storage_crc;
- }
-
- Crc32 ComputeChecksum() const {
- return Crc32(std::string_view(reinterpret_cast<const char*>(this),
- sizeof(ComponentCrcs)));
- }
- } __attribute__((packed));
-
- bool operator==(const Crcs& other) const {
- return all_crc == other.all_crc && component_crcs == other.component_crcs;
- }
-
- uint32_t all_crc;
- ComponentCrcs component_crcs;
- } __attribute__((packed));
- static_assert(sizeof(Crcs) == 20, "");
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataFileOffset = 0;
+ static constexpr int32_t kInfoMetadataFileOffset =
+ static_cast<int32_t>(sizeof(Crcs));
- // Info
struct Info {
- static constexpr int32_t kFileOffset = static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMagic = 0x653afd7b;
- int32_t version;
+ int32_t magic;
int32_t value_type_size;
int32_t max_load_factor_percent;
int32_t num_deleted_entries;
@@ -131,6 +102,9 @@ class PersistentHashMap {
} __attribute__((packed));
static_assert(sizeof(Info) == 20, "");
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 32, "");
+
// Bucket
class Bucket {
public:
@@ -270,33 +244,48 @@ class PersistentHashMap {
int32_t init_num_buckets;
};
- static constexpr int32_t kVersion = 1;
-
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
static constexpr std::string_view kFilePrefix = "persistent_hash_map";
- // Only metadata, bucket, entry files are stored under this sub-directory, for
- // rehashing branching use.
- static constexpr std::string_view kSubDirectory = "dynamic";
// Creates a new PersistentHashMap to read/write/delete key value pairs.
//
// filesystem: Object to make system level calls
- // base_dir: Specifies the directory for all persistent hash map related
- // sub-directory and files to be stored. If base_dir doesn't exist,
- // then PersistentHashMap will automatically create it. If files
- // exist, then it will initialize the hash map from existing files.
+ // working_path: Specifies the working path for PersistentStorage.
+ // PersistentHashMap uses working path as working directory and
+ // all related files will be stored under this directory. It
+ // takes full ownership and of working_path_, including
+ // creation/deletion. It is the caller's responsibility to
+ // specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the
+ // caller has the ownership for the parent directory of
+ // working_path_, and it is responsible for parent directory
+ // creation/deletion. See PersistentStorage for more details
+ // about the concept of working_path.
// options: Options instance.
//
// Returns:
// INVALID_ARGUMENT_ERROR if any value in options is invalid.
// FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
- // checksum.
+ // checksum or any other inconsistency.
// INTERNAL_ERROR on I/O errors.
// Any FileBackedVector errors.
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
- Create(const Filesystem& filesystem, std::string_view base_dir,
- const Options& options);
+ Create(const Filesystem& filesystem, std::string working_path,
+ Options options);
- ~PersistentHashMap();
+ // Deletes PersistentHashMap under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ std::string working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
+ ~PersistentHashMap() override;
// Update a key value pair. If key does not exist, then insert (key, value)
// into the storage. Otherwise overwrite the value into the storage.
@@ -349,13 +338,6 @@ class PersistentHashMap {
Iterator GetIterator() const { return Iterator(this); }
- // Flushes content to underlying files.
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistToDisk();
-
// Calculates and returns the disk usage (metadata + 3 storages total file
// size) in bytes.
//
@@ -374,16 +356,8 @@ class PersistentHashMap {
// INTERNAL_ERROR on I/O error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
- // Updates all checksums of the persistent hash map components and returns
- // all_crc.
- //
- // Returns:
- // Crc of all components (all_crc) on success
- // INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
-
int32_t size() const {
- return entry_storage_->num_elements() - info()->num_deleted_entries;
+ return entry_storage_->num_elements() - info().num_deleted_entries;
}
bool empty() const { return size() == 0; }
@@ -402,14 +376,14 @@ class PersistentHashMap {
};
explicit PersistentHashMap(
- const Filesystem& filesystem, std::string_view base_dir,
- const Options& options, MemoryMappedFile&& metadata_mmapped_file,
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options, MemoryMappedFile&& metadata_mmapped_file,
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
std::unique_ptr<FileBackedVector<Entry>> entry_storage,
std::unique_ptr<FileBackedVector<char>> kv_storage)
- : filesystem_(&filesystem),
- base_dir_(base_dir),
- options_(options),
+ : PersistentStorage(filesystem, std::move(working_path),
+ kWorkingPathType),
+ options_(std::move(options)),
metadata_mmapped_file_(std::make_unique<MemoryMappedFile>(
std::move(metadata_mmapped_file))),
bucket_storage_(std::move(bucket_storage)),
@@ -417,12 +391,40 @@ class PersistentHashMap {
kv_storage_(std::move(kv_storage)) {}
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
- InitializeNewFiles(const Filesystem& filesystem, std::string_view base_dir,
- const Options& options);
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ Options&& options);
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
InitializeExistingFiles(const Filesystem& filesystem,
- std::string_view base_dir, const Options& options);
+ std::string&& working_path, Options&& options);
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk() override;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk() override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+
+ // Computes and returns all storages checksum. Checksums of bucket_storage_,
+ // entry_storage_ and kv_storage_ will be combined together by XOR.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
// Find the index of the target entry (that contains the key) from a bucket
// (specified by bucket index). Also return the previous entry index, since
@@ -468,23 +470,25 @@ class PersistentHashMap {
// Any FileBackedVector errors
libtextclassifier3::Status RehashIfNecessary(bool force_rehash);
- Crcs* crcs() {
- return reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
- Crcs::kFileOffset);
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ kCrcsMetadataFileOffset);
}
- Info* info() {
- return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
- Info::kFileOffset);
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+ kCrcsMetadataFileOffset);
}
- const Info* info() const {
- return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
- Info::kFileOffset);
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
}
- const Filesystem* filesystem_;
- std::string base_dir_;
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
+ }
Options options_;
diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc
index 8fde4a8..6e9a41b 100644
--- a/icing/file/persistent-hash-map_test.cc
+++ b/icing/file/persistent-hash-map_test.cc
@@ -24,7 +24,9 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/crc32.h"
@@ -34,6 +36,7 @@ using ::testing::Eq;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::Key;
using ::testing::Lt;
using ::testing::Not;
@@ -48,7 +51,7 @@ namespace lib {
namespace {
using Bucket = PersistentHashMap::Bucket;
-using Crcs = PersistentHashMap::Crcs;
+using Crcs = PersistentStorage::Crcs;
using Entry = PersistentHashMap::Entry;
using Info = PersistentHashMap::Info;
using Options = PersistentHashMap::Options;
@@ -59,7 +62,11 @@ static constexpr int32_t kTestInitNumBuckets = 1;
class PersistentHashMapTest : public ::testing::Test {
protected:
void SetUp() override {
- base_dir_ = GetTestTempDir() + "/persistent_hash_map_test";
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/persistent_hash_map_test";
}
void TearDown() override {
@@ -93,6 +100,7 @@ class PersistentHashMapTest : public ::testing::Test {
Filesystem filesystem_;
std::string base_dir_;
+ std::string working_path_;
};
TEST_F(PersistentHashMapTest, OptionsInvalidValueTypeSize) {
@@ -185,11 +193,11 @@ TEST_F(PersistentHashMapTest,
EXPECT_FALSE(options.IsValid());
}
-TEST_F(PersistentHashMapTest, InvalidBaseDir) {
- EXPECT_THAT(
- PersistentHashMap::Create(filesystem_, "/dev/null",
- Options(/*value_type_size_in=*/sizeof(int))),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+TEST_F(PersistentHashMapTest, InvalidWorkingPath) {
+ EXPECT_THAT(PersistentHashMap::Create(
+ filesystem_, "/dev/null/persistent_hash_map_test",
+ Options(/*value_type_size_in=*/sizeof(int))),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) {
@@ -197,16 +205,16 @@ TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) {
ASSERT_FALSE(invalid_options.IsValid());
EXPECT_THAT(
- PersistentHashMap::Create(filesystem_, base_dir_, invalid_options),
+ PersistentHashMap::Create(filesystem_, working_path_, invalid_options),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(PersistentHashMapTest, InitializeNewFiles) {
{
- ASSERT_FALSE(filesystem_.DirectoryExists(base_dir_.c_str()));
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
+ PersistentHashMap::Create(filesystem_, working_path_,
Options(/*value_type_size_in=*/sizeof(int))));
EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty()));
@@ -215,17 +223,16 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) {
// Metadata file should be initialized correctly for both info and crcs
// sections.
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
// Check info section
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
- EXPECT_THAT(info.version, Eq(PersistentHashMap::kVersion));
+ PersistentHashMap::kInfoMetadataFileOffset));
+ EXPECT_THAT(info.magic, Eq(Info::kMagic));
EXPECT_THAT(info.value_type_size, Eq(sizeof(int)));
EXPECT_THAT(info.max_load_factor_percent,
Eq(Options::kDefaultMaxLoadFactorPercent));
@@ -235,13 +242,10 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) {
// Check crcs section
Crcs crcs;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ PersistentHashMap::kCrcsMetadataFileOffset));
// # of elements in bucket_storage should be 1, so it should have non-zero
- // crc value.
- EXPECT_THAT(crcs.component_crcs.bucket_storage_crc, Not(Eq(0)));
- // Other empty file backed vectors should have 0 crc value.
- EXPECT_THAT(crcs.component_crcs.entry_storage_crc, Eq(0));
- EXPECT_THAT(crcs.component_crcs.kv_storage_crc, Eq(0));
+ // all storages crc value.
+ EXPECT_THAT(crcs.component_crcs.storages_crc, Not(Eq(0)));
EXPECT_THAT(crcs.component_crcs.info_crc,
Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
sizeof(Info)))
@@ -260,7 +264,7 @@ TEST_F(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -279,7 +283,7 @@ TEST_F(PersistentHashMapTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/1,
@@ -301,7 +305,7 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map->num_buckets(),
Eq(original_init_num_buckets));
@@ -314,7 +318,7 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// # of buckets should still be the original value.
EXPECT_THAT(persistent_hash_map->num_buckets(),
Eq(original_init_num_buckets));
@@ -327,7 +331,7 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
@@ -343,7 +347,7 @@ TEST_F(PersistentHashMapTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
- EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options),
+ EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -353,7 +357,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map1,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data()));
@@ -374,7 +378,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map2,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map2, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "b"), IsOkAndHolds(2));
@@ -387,7 +391,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
@@ -407,7 +411,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
// we should be able to get the same contents.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
@@ -415,12 +419,66 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) {
}
TEST_F(PersistentHashMapTest,
+ InitializeExistingFilesWithDifferentMagicShouldFail) {
+ Options options(/*value_type_size_in=*/sizeof(int));
+
+ {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, working_path_, options));
+ ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+
+ ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+ }
+
+ {
+ // Manually change kMagic and update checksum
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ PersistentHashMap::kCrcsMetadataFileOffset));
+
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ PersistentHashMap::kInfoMetadataFileOffset));
+
+ // Manually change magic and update checksums.
+ info.magic += kCorruptedValueOffset;
+ crcs.component_crcs.info_crc = info.ComputeChecksum().Get();
+ crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kInfoMetadataFileOffset,
+ &info, sizeof(Info)));
+ }
+
+ {
+ // Attempt to create the persistent hash map with different magic. This
+ // should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
+ persistent_hash_map_or =
+ PersistentHashMap::Create(filesystem_, working_path_, options);
+ EXPECT_THAT(persistent_hash_map_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+ HasSubstr("PersistentHashMap header magic mismatch"));
+ }
+}
+
+TEST_F(PersistentHashMapTest,
InitializeExistingFilesWithDifferentValueTypeSizeShouldFail) {
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
+ PersistentHashMap::Create(filesystem_, working_path_,
Options(/*value_type_size_in=*/sizeof(int))));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
@@ -433,7 +491,7 @@ TEST_F(PersistentHashMapTest,
ASSERT_THAT(sizeof(char), Not(Eq(sizeof(int))));
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or = PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(/*value_type_size_in=*/sizeof(char)));
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
@@ -449,7 +507,7 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
@@ -461,7 +519,7 @@ TEST_F(PersistentHashMapTest,
options.max_num_entries = 1;
ASSERT_TRUE(options.IsValid());
- EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options),
+ EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -478,7 +536,7 @@ TEST_F(PersistentHashMapTest,
options.max_num_entries = 1;
ASSERT_TRUE(options.IsValid());
- EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options),
+ EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
}
@@ -490,26 +548,26 @@ TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Crcs crcs;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ PersistentHashMap::kCrcsMetadataFileOffset));
// Manually corrupt all_crc
crcs.all_crc += kCorruptedValueOffset;
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
metadata_sfd.reset();
{
@@ -517,11 +575,11 @@ TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) {
// corrupted all_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
- HasSubstr("Invalid all crc for PersistentHashMap"));
+ HasSubstr("Invalid all crc"));
}
}
@@ -533,173 +591,169 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
+ PersistentHashMap::kInfoMetadataFileOffset));
// Modify info, but don't update the checksum. This would be similar to
// corruption of info.
info.num_deleted_entries += kCorruptedValueOffset;
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Info::kFileOffset, &info,
- sizeof(Info)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ PersistentHashMap::kInfoMetadataFileOffset,
+ &info, sizeof(Info)));
{
// Attempt to create the persistent hash map with info that doesn't match
// its checksum and confirm that it fails.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
- HasSubstr("Invalid info crc for PersistentHashMap"));
+ HasSubstr("Invalid info crc"));
}
}
TEST_F(PersistentHashMapTest,
- InitializeExistingFilesWithWrongBucketStorageCrc) {
+ InitializeExistingFilesWithCorruptedBucketStorage) {
Options options(/*value_type_size_in=*/sizeof(int));
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Update bucket storage manually.
+ const std::string bucket_storage_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".b");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, bucket_storage_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ bucket_storage->ComputeChecksum());
+ ICING_ASSERT_OK(bucket_storage->Append(Bucket()));
+ ICING_ASSERT_OK(bucket_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ bucket_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt bucket_storage_crc
- crcs.component_crcs.bucket_storage_crc += kCorruptedValueOffset;
- crcs.all_crc = Crc32(std::string_view(
- reinterpret_cast<const char*>(&crcs.component_crcs),
- sizeof(Crcs::ComponentCrcs)))
- .Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the persistent hash map with metadata containing
// corrupted bucket_storage_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- persistent_hash_map_or.status().error_message(),
- HasSubstr("Mismatch crc with PersistentHashMap bucket storage"));
+ EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
}
}
-TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongEntryStorageCrc) {
+TEST_F(PersistentHashMapTest,
+ InitializeExistingFilesWithCorruptedEntryStorage) {
Options options(/*value_type_size_in=*/sizeof(int));
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Update entry storage manually.
+ const std::string entry_storage_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".e");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Entry>> entry_storage,
+ FileBackedVector<Entry>::Create(
+ filesystem_, entry_storage_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, entry_storage->ComputeChecksum());
+ ICING_ASSERT_OK(entry_storage->Append(
+ Entry(/*key_value_index=*/-1, /*next_entry_index=*/-1)));
+ ICING_ASSERT_OK(entry_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, entry_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt entry_storage_crc
- crcs.component_crcs.entry_storage_crc += kCorruptedValueOffset;
- crcs.all_crc = Crc32(std::string_view(
- reinterpret_cast<const char*>(&crcs.component_crcs),
- sizeof(Crcs::ComponentCrcs)))
- .Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the persistent hash map with metadata containing
// corrupted entry_storage_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(persistent_hash_map_or.status().error_message(),
- HasSubstr("Mismatch crc with PersistentHashMap entry storage"));
+ HasSubstr("Invalid storages crc"));
}
}
TEST_F(PersistentHashMapTest,
- InitializeExistingFilesWithWrongKeyValueStorageCrc) {
+ InitializeExistingFilesWithCorruptedKeyValueStorage) {
Options options(/*value_type_size_in=*/sizeof(int));
{
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Update kv storage manually.
+ const std::string kv_storage_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".k");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> kv_storage,
+ FileBackedVector<char>::Create(
+ filesystem_, kv_storage_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, kv_storage->ComputeChecksum());
+ ICING_ASSERT_OK(kv_storage->Append('z'));
+ ICING_ASSERT_OK(kv_storage->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, kv_storage->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt kv_storage_crc
- crcs.component_crcs.kv_storage_crc += kCorruptedValueOffset;
- crcs.all_crc = Crc32(std::string_view(
- reinterpret_cast<const char*>(&crcs.component_crcs),
- sizeof(Crcs::ComponentCrcs)))
- .Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the persistent hash map with metadata containing
// corrupted kv_storage_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
persistent_hash_map_or =
- PersistentHashMap::Create(filesystem_, base_dir_, options);
+ PersistentHashMap::Create(filesystem_, working_path_, options);
EXPECT_THAT(persistent_hash_map_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- persistent_hash_map_or.status().error_message(),
- HasSubstr("Mismatch crc with PersistentHashMap key value storage"));
+ EXPECT_THAT(persistent_hash_map_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
}
}
@@ -716,7 +770,7 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
@@ -739,7 +793,7 @@ TEST_F(PersistentHashMapTest,
// Also verify all entries should remain unchanged.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
@@ -748,15 +802,14 @@ TEST_F(PersistentHashMapTest,
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/",
- PersistentHashMap::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", PersistentHashMap::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
+ PersistentHashMap::kInfoMetadataFileOffset));
EXPECT_THAT(info.max_load_factor_percent,
Eq(options.max_load_factor_percent));
@@ -765,7 +818,7 @@ TEST_F(PersistentHashMapTest,
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
@@ -786,7 +839,7 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
@@ -816,7 +869,7 @@ TEST_F(PersistentHashMapTest,
// should remain the same.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(prev_num_buckets));
@@ -834,7 +887,7 @@ TEST_F(PersistentHashMapTest,
// exceeds the limit.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_, options));
+ PersistentHashMap::Create(filesystem_, working_path_, options));
// After changing max_load_factor_percent, there should be rehashing and the
// new loading should not be greater than the new max load factor.
@@ -856,7 +909,7 @@ TEST_F(PersistentHashMapTest, PutAndGet) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -892,7 +945,7 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -925,7 +978,7 @@ TEST_F(PersistentHashMapTest, ShouldRehash) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -961,7 +1014,7 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -987,7 +1040,7 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1016,7 +1069,7 @@ TEST_F(PersistentHashMapTest, Delete) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1067,7 +1120,7 @@ TEST_F(PersistentHashMapTest, DeleteMultiple) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1125,7 +1178,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1163,7 +1216,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1200,7 +1253,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1238,7 +1291,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1262,7 +1315,7 @@ TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/1,
@@ -1297,7 +1350,7 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
+ PersistentHashMap::Create(filesystem_, working_path_,
Options(/*value_type_size_in=*/sizeof(int))));
const char invalid_key[] = "a\0bc";
@@ -1319,7 +1372,7 @@ TEST_F(PersistentHashMapTest, EmptyHashMapIterator) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1336,7 +1389,7 @@ TEST_F(PersistentHashMapTest, Iterator) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1363,7 +1416,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1392,7 +1445,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1421,7 +1474,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
@@ -1450,7 +1503,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(
/*value_type_size_in=*/sizeof(int),
/*max_num_entries_in=*/Entry::kMaxNumEntries,
diff --git a/icing/file/persistent-storage.cc b/icing/file/persistent-storage.cc
new file mode 100644
index 0000000..9a595ef
--- /dev/null
+++ b/icing/file/persistent-storage.cc
@@ -0,0 +1,55 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/persistent-storage.h"
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::Status PersistentStorage::Discard(
+ const Filesystem& filesystem, const std::string& working_path,
+ WorkingPathType working_path_type) {
+ switch (working_path_type) {
+ case WorkingPathType::kSingleFile: {
+ if (!filesystem.DeleteFile(working_path.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to delete PersistentStorage file: ", working_path));
+ }
+ return libtextclassifier3::Status::OK;
+ }
+ case WorkingPathType::kDirectory: {
+ if (!filesystem.DeleteDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to delete PersistentStorage directory: ", working_path));
+ }
+ return libtextclassifier3::Status::OK;
+ }
+ case WorkingPathType::kDummy:
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Unknown working path type %d for PersistentStorage %s",
+ static_cast<int>(working_path_type), working_path.c_str()));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h
new file mode 100644
index 0000000..a70c9e9
--- /dev/null
+++ b/icing/file/persistent-storage.h
@@ -0,0 +1,338 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_PERSISTENT_STORAGE_H_
+#define ICING_FILE_PERSISTENT_STORAGE_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// PersistentStorage: an abstract class for all persistent data structures.
+// - It provides some common persistent file methods, e.g. PersistToDisk.
+// - It encapsulates most of the checksum handling logics (including update and
+// validation).
+//
+// Terminology:
+// - Crcs: checksum section
+// - Info: (custom) information for derived class
+// - Metadata: Crcs + Info
+//
+// Usually a persistent data structure will have its own custom Info and
+// storages (single or composite storages) definition. To create a new
+// persistent data structure via PersistentStorage:
+// - Decide what type the working path is (single file or directory). See
+// working_path_ and WorkingPathType for more details.
+// - Create a new class that inherits PersistentStorage:
+// - Declare custom Info and design the metadata section layout.
+// Usually the layout is <Crcs><Info>, and there are 2 common ways to
+// manage metadata section:
+// - Have a separate file for metadata. In this case, the new persistent
+// data structure contains multiple files, so working path should be used
+// as directory path and multiple files will be stored under it. Example:
+// PersistentHashMap.
+// - Have a single file for both metadata and storage data. In this case,
+// the file layout should be <Crcs><Info><Storage Data>, and
+// working path should be used as file path. Example: FileBackedVector.
+// - Handle working path file/directory creation and deletion.
+// PersistentStorage only provides static Discard() method to use. The
+// derived class should implement other logics, e.g. working path (file
+// /directory) creation, check condition to discard working path and start
+// over new file(s).
+// - Implement all pure virtual methods:
+// - PersistStoragesToDisk: persist all (composite) storages. In general,
+// the implementation will be calling PersistToDisk for all composite
+// storages.
+// - PersistMetadataToDisk: persist metadata, including Crcs and Info.
+// - If the derived class maintains a concrete Crc and (custom) Info
+// instance, then it should perform write/pwrite into the metadata
+// section.
+// - If the derived class uses memory-mapped region directly for metadata,
+// then it should call MemoryMappedFile::PersistToDisk.
+// - See crcs() for more details.
+// - ComputeInfoChecksum: compute the checksum for custom Info.
+// - ComputeStoragesChecksum: compute the (combined) checksum for all
+// (composite) storages. In general, the implementation will be calling
+// UpdateChecksums for all composite storages and XOR all checksums.
+// - crcs(): provide the reference for PersistentStorage to write checksums.
+// The derived class can either maintain a concrete Crcs instance, or
+// reinterpret_cast the memory-mapped region to Crcs reference. Either
+// choice is fine as long as PersistMetadataToDisk flushes it to disk
+// correctly.
+// - Call either InitializeNewStorage or InitializeExistingStorage when creating
+// and initializing an instance, depending on initializing new storage or from
+// existing file(s).
+class PersistentStorage {
+ public:
+ enum class WorkingPathType {
+ kSingleFile,
+ kDirectory,
+ kDummy,
+ };
+
+ // Crcs and Info will be written into the metadata section. Info is defined by
+ // the actual implementation of each persistent storage. Usually the Metadata
+ // layout is: <Crcs><Info>
+ struct Crcs {
+ struct ComponentCrcs {
+ uint32_t info_crc;
+ uint32_t storages_crc;
+
+ bool operator==(const ComponentCrcs& other) const {
+ return info_crc == other.info_crc && storages_crc == other.storages_crc;
+ }
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(std::string_view(reinterpret_cast<const char*>(this),
+ sizeof(ComponentCrcs)));
+ }
+ } __attribute__((packed));
+
+ bool operator==(const Crcs& other) const {
+ return all_crc == other.all_crc && component_crcs == other.component_crcs;
+ }
+
+ uint32_t all_crc;
+ ComponentCrcs component_crcs;
+ } __attribute__((packed));
+ static_assert(sizeof(Crcs) == 12, "");
+
+ // Deletes working_path according to its type.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ // - INVALID_ARGUMENT_ERROR if working_path_type is unknown type
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path,
+ WorkingPathType working_path_type);
+
+ virtual ~PersistentStorage() = default;
+
+ // Initializes new persistent storage. It computes the initial checksums and
+ // writes into the metadata file.
+ //
+ // Note: either InitializeNewStorage or InitializeExistingStorage should be
+ // invoked after creating a PersistentStorage instance before using, otherwise
+ // an uninitialized instance will fail to use persistent storage features,
+ // e.g. PersistToDisk, UpdateChecksums.
+ //
+ // Returns:
+ // - OK on success or already initialized
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::Status InitializeNewStorage() {
+ if (is_initialized_) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(UpdateChecksumsInternal());
+ ICING_RETURN_IF_ERROR(PersistMetadataToDisk());
+
+ is_initialized_ = true;
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Initializes persistent storage from existing file(s).
+ //
+ // It enforces the following check(s):
+ // - Validate checksums.
+ //
+ // Note: either InitializeNewStorage or InitializeExistingStorage should be
+ // invoked after creating a PersistentStorage instance before using.
+ //
+ // Returns:
+ // - OK on success or already initialized
+ // - FAILED_PRECONDITION_ERROR if checksum validation fails.
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::Status InitializeExistingStorage() {
+ if (is_initialized_) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ ICING_RETURN_IF_ERROR(ValidateChecksums());
+
+ is_initialized_ = true;
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Flushes contents to underlying files.
+ // 1) Flushes storages.
+ // 2) Updates all checksums by new data.
+ // 3) Flushes metadata.
+ //
+ // Returns:
+ // - OK on success
+ // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
+ // - Any errors from PersistStoragesToDisk, UpdateChecksums,
+ // PersistMetadataToDisk, depending on actual implementation
+ libtextclassifier3::Status PersistToDisk() {
+ if (!is_initialized_) {
+ return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+ "PersistentStorage ", working_path_, " not initialized"));
+ }
+
+ ICING_RETURN_IF_ERROR(PersistStoragesToDisk());
+ ICING_RETURN_IF_ERROR(UpdateChecksums());
+ ICING_RETURN_IF_ERROR(PersistMetadataToDisk());
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Updates checksums of all components and returns the overall crc (all_crc)
+ // of the persistent storage.
+ //
+ // Returns:
+ // - Overall crc of the persistent storage on success
+ // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::StatusOr<Crc32> UpdateChecksums() {
+ if (!is_initialized_) {
+ return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+ "PersistentStorage ", working_path_, " not initialized"));
+ }
+
+ return UpdateChecksumsInternal();
+ }
+
+ protected:
+ explicit PersistentStorage(const Filesystem& filesystem,
+ std::string working_path,
+ WorkingPathType working_path_type)
+ : filesystem_(filesystem),
+ working_path_(std::move(working_path)),
+ working_path_type_(working_path_type),
+ is_initialized_(false) {}
+
+ // Flushes contents of metadata. The implementation should flush Crcs and Info
+ // correctly, depending on whether they're using memory-mapped regions or
+ // concrete instances in the derived class.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on actual implementation
+ virtual libtextclassifier3::Status PersistMetadataToDisk() = 0;
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - Any other errors, depending on actual implementation
+ virtual libtextclassifier3::Status PersistStoragesToDisk() = 0;
+
+ // Computes and returns Info checksum.
+ //
+ // This function will be mainly called by UpdateChecksums.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ // - Any other errors, depending on actual implementation
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() = 0;
+
+ // Computes and returns all storages checksum. If there are multiple storages,
+ // usually we XOR their checksums together to a single checksum.
+ //
+ // This function will be mainly called by UpdateChecksums.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - Any other errors from depending on actual implementation
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() = 0;
+
+ // Returns the Crcs instance reference. The derived class can either own a
+ // concrete Crcs instance, or reinterpret_cast the memory-mapped region to
+ // Crcs reference. PersistMetadataToDisk should flush it to disk correctly.
+ virtual Crcs& crcs() = 0;
+ virtual const Crcs& crcs() const = 0;
+
+ const Filesystem& filesystem_;
+ // Path to the storage. It can be a single file path or a directory path
+ // depending on the implementation of the derived class.
+ //
+ // Note that the derived storage class will take full ownership and of
+ // working_path_, including creation/deletion. It is the caller's
+ // responsibility to specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the caller has the
+ // ownership for the parent directory of working_path_, and it is responsible
+ // for parent directory creation/deletion.
+ std::string working_path_;
+ WorkingPathType working_path_type_;
+
+ bool is_initialized_;
+
+ private:
+ // Updates checksums of all components and returns the overall crc (all_crc)
+ // of the persistent storage. Different from UpdateChecksums, it won't check
+ // if PersistentStorage is initialized or not.
+ //
+ // Returns:
+ // - Overall crc of the persistent storage on success
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal() {
+ Crcs& crcs_ref = crcs();
+ // Compute and update storages + info checksums.
+ ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum());
+ crcs_ref.component_crcs.info_crc = info_crc.Get();
+ crcs_ref.component_crcs.storages_crc = storages_crc.Get();
+
+ // Finally compute and update overall checksum.
+ crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get();
+ return Crc32(crcs_ref.all_crc);
+ }
+
+ // Validates all checksums of the persistent storage.
+ //
+ // Returns:
+ // - OK on success
+ // - FAILED_PRECONDITION_ERROR if any checksum is incorrect.
+ // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
+ // on actual implementation
+ libtextclassifier3::Status ValidateChecksums() {
+ const Crcs& crcs_ref = crcs();
+ if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) {
+ return absl_ports::FailedPreconditionError("Invalid all crc");
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum());
+ if (crcs_ref.component_crcs.info_crc != info_crc.Get()) {
+ return absl_ports::FailedPreconditionError("Invalid info crc");
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum());
+ if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) {
+ return absl_ports::FailedPreconditionError("Invalid storages crc");
+ }
+
+ return libtextclassifier3::Status::OK;
+ }
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_PERSISTENT_STORAGE_H_
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index e48e6e0..48e3501 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -64,7 +64,6 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
@@ -79,6 +78,7 @@
#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace icing {
namespace lib {
@@ -971,8 +971,7 @@ PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
return absl_ports::NotFoundError("The proto data has been erased.");
}
- google::protobuf::io::ArrayInputStream proto_stream(buf.get(),
- stored_size);
+ google::protobuf::io::ArrayInputStream proto_stream(buf.get(), stored_size);
// Deserialize proto
ProtoT proto;
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 20759f8..dd43204 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -93,6 +93,7 @@ namespace {
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
+constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
constexpr std::string_view kSchemaSubfolderName = "schema_dir";
constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
constexpr std::string_view kInitMarkerFilename = "init_marker";
@@ -343,6 +344,14 @@ std::string MakeIndexDirectoryPath(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kIndexSubfolderName);
}
+// Working path for integer index. Integer index is derived from
+// PersistentStorage and it will take full ownership of this working path,
+// including creation/deletion. See PersistentStorage for more details about
+// working path.
+std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName);
+}
+
// SchemaStore files are in a standalone subfolder for easier file management.
// We can delete and recreate the subfolder and not touch/affect anything
// else.
@@ -655,7 +664,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
// TODO(b/249829533): switch to use persistent numeric index after
// implementing and initialize numeric index.
- integer_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ TC3_ASSIGN_OR_RETURN(
+ integer_index_,
+ DummyNumericIndex<int64_t>::Create(
+ *filesystem_, MakeIntegerIndexWorkingPath(options_.base_dir())));
libtextclassifier3::Status index_init_status;
if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
@@ -1738,8 +1750,9 @@ SearchResultProto IcingSearchEngine::Search(
ScopedTimer overall_timer(clock_->GetNewTimer(), [query_stats](int64_t t) {
query_stats->set_latency_ms(t);
});
- // TODO(b/146008613) Explore ideas to make this function read-only.
- absl_ports::unique_lock l(&mutex_);
+ // Only an overall read-lock is required here. A finer-grained write-lock is
+ // provided around the LiteIndex.
+ absl_ports::shared_lock l(&mutex_);
query_stats->set_lock_acquisition_latency_ms(
overall_timer.timer().GetElapsedMilliseconds());
if (!initialized_) {
@@ -1768,9 +1781,40 @@ SearchResultProto IcingSearchEngine::Search(
query_stats->set_is_first_page(true);
query_stats->set_requested_page_size(result_spec.num_per_page());
- // Process query and score
- QueryScoringResults query_scoring_results =
- ProcessQueryAndScore(search_spec, scoring_spec, result_spec);
+ const JoinSpecProto& join_spec = search_spec.join_spec();
+ std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
+ if (!join_spec.parent_property_expression().empty() &&
+ !join_spec.child_property_expression().empty()) {
+ // Process child query
+ QueryScoringResults nested_query_scoring_results =
+ ProcessQueryAndScore(join_spec.nested_spec().search_spec(),
+ join_spec.nested_spec().scoring_spec(),
+ join_spec.nested_spec().result_spec(),
+ /*join_children_fetcher=*/nullptr);
+ // TOOD(b/256022027): set different kinds of latency for 2nd query.
+ if (!nested_query_scoring_results.status.ok()) {
+ TransformStatus(nested_query_scoring_results.status, result_status);
+ return result_proto;
+ }
+
+ JoinProcessor join_processor(document_store_.get());
+ // Building a JoinChildrenFetcher where child documents are grouped by
+ // their joinable values.
+ libtextclassifier3::StatusOr<JoinChildrenFetcher> join_children_fetcher_or =
+ join_processor.GetChildrenFetcher(
+ search_spec.join_spec(),
+ std::move(nested_query_scoring_results.scored_document_hits));
+ if (!join_children_fetcher_or.ok()) {
+ TransformStatus(join_children_fetcher_or.status(), result_status);
+ return result_proto;
+ }
+ join_children_fetcher = std::make_unique<JoinChildrenFetcher>(
+ std::move(join_children_fetcher_or).ValueOrDie());
+ }
+
+ // Process parent query
+ QueryScoringResults query_scoring_results = ProcessQueryAndScore(
+ search_spec, scoring_spec, result_spec, join_children_fetcher.get());
int term_count = 0;
for (const auto& section_and_terms : query_scoring_results.query_terms) {
term_count += section_and_terms.second.size();
@@ -1793,26 +1837,13 @@ SearchResultProto IcingSearchEngine::Search(
}
std::unique_ptr<ScoredDocumentHitsRanker> ranker;
- const JoinSpecProto& join_spec = search_spec.join_spec();
- if (!join_spec.parent_property_expression().empty() &&
- !join_spec.child_property_expression().empty()) {
- // Process 2nd query
- QueryScoringResults nested_query_scoring_results =
- ProcessQueryAndScore(join_spec.nested_spec().search_spec(),
- join_spec.nested_spec().scoring_spec(),
- join_spec.nested_spec().result_spec());
- // TOOD(b/256022027): set different kinds of latency for 2nd query.
- if (!nested_query_scoring_results.status.ok()) {
- TransformStatus(nested_query_scoring_results.status, result_status);
- return result_proto;
- }
-
+ if (join_children_fetcher != nullptr) {
// Join 2 scored document hits
JoinProcessor join_processor(document_store_.get());
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
joined_result_document_hits_or = join_processor.Join(
join_spec, std::move(query_scoring_results.scored_document_hits),
- std::move(nested_query_scoring_results.scored_document_hits));
+ *join_children_fetcher);
if (!joined_result_document_hits_or.ok()) {
TransformStatus(joined_result_document_hits_or.status(), result_status);
return result_proto;
@@ -1896,7 +1927,8 @@ SearchResultProto IcingSearchEngine::Search(
IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec) {
+ const ResultSpecProto& result_spec,
+ const JoinChildrenFetcher* join_children_fetcher) {
std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
@@ -1934,8 +1966,9 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
component_timer = clock_->GetNewTimer();
// Scores but does not rank the results.
libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
- scoring_processor_or = ScoringProcessor::Create(
- scoring_spec, document_store_.get(), schema_store_.get());
+ scoring_processor_or =
+ ScoringProcessor::Create(scoring_spec, document_store_.get(),
+ schema_store_.get(), join_children_fetcher);
if (!scoring_processor_or.ok()) {
return QueryScoringResults(std::move(scoring_processor_or).status(),
std::move(query_results.query_terms),
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 221d86c..446e081 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -29,6 +29,7 @@
#include "icing/index/index.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/jni/jni-cache.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/performance-configuration.h"
#include "icing/proto/debug.pb.h"
@@ -587,10 +588,11 @@ class IcingSearchEngine {
parse_query_latency_ms(parse_query_latency_ms_in),
scoring_latency_ms(scoring_latency_ms_in) {}
};
- QueryScoringResults ProcessQueryAndScore(const SearchSpecProto& search_spec,
- const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec)
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ QueryScoringResults ProcessQueryAndScore(
+ const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec,
+ const JoinChildrenFetcher* join_children_fetcher)
+ ICING_SHARED_LOCKS_REQUIRED(mutex_);
// Many of the internal components rely on other components' derived data.
// Check that everything is consistent with each other so that we're not
diff --git a/icing/icing-search-engine_delete_test.cc b/icing/icing-search-engine_delete_test.cc
new file mode 100644
index 0000000..c3b1ccd
--- /dev/null
+++ b/icing/icing-search-engine_delete_test.cc
@@ -0,0 +1,768 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Return;
+using ::testing::SizeIs;
+using ::testing::StrEq;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Delete*.
+class IcingSearchEngineDeleteTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteBySchemaType) {
+ SchemaProto schema;
+ // Add an email type
+ auto type = schema.add_types();
+ type->set_schema_type("email");
+ auto property = type->add_properties();
+ property->set_property_name("subject");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ // Add an message type
+ type = schema.add_types();
+ type->set_schema_type("message");
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("email")
+ .AddStringProperty("subject", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete the first type. The first doc should be irretrievable. The
+ // second should still be present.
+ DeleteBySchemaTypeResultProto result_proto =
+ icing.DeleteBySchemaType("message");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Search for "message", only document2 should show up.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("message");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteSchemaTypeByQuery) {
+ SchemaProto schema = CreateMessageSchema();
+ // Add an email type
+ SchemaProto tmp = CreateEmailSchema();
+ *schema.add_types() = tmp.types(0);
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema(schema.types(0).schema_type())
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema(schema.types(1).schema_type())
+ .AddStringProperty("subject", "subject subject2")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete the first type. The first doc should be irretrievable. The
+ // second should still be present.
+ SearchSpecProto search_spec;
+ search_spec.add_schema_type_filters(schema.types(0).schema_type());
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByNamespace) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete namespace1. Document1 and document2 should be irretrievable.
+ // Document3 should still be present.
+ DeleteByNamespaceResultProto result_proto =
+ icing.DeleteByNamespace("namespace1");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(2);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri2) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Search for "message", only document3 should show up.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("message");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteNamespaceByQuery) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete the first namespace. The first doc should be irretrievable. The
+ // second should still be present.
+ SearchSpecProto search_spec;
+ search_spec.add_namespace_filters("namespace1");
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQuery) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete all docs containing 'body1'. The first doc should be irretrievable.
+ // The second should still be present.
+ SearchSpecProto search_spec;
+ search_spec.set_query("body1");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteByQueryStatsProto exp_stats;
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ exp_stats.set_query_length(search_spec.query().length());
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_parse_query_latency_ms(7);
+ exp_stats.set_document_removal_latency_ms(7);
+ EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace1, uri1) not found.");
+ expected_get_result_proto.clear_document();
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryReturnInfo) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete all docs to test the information is correctly grouped.
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ DeleteByQueryResultProto result_proto =
+ icing.DeleteByQuery(search_spec, true);
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ DeleteByQueryStatsProto exp_stats;
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(3);
+ exp_stats.set_query_length(search_spec.query().length());
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_parse_query_latency_ms(7);
+ exp_stats.set_document_removal_latency_ms(7);
+ EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+ // Check that DeleteByQuery can return information for deleted documents.
+ DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
+ info1.set_namespace_("namespace1");
+ info1.set_schema("Message");
+ info1.add_uris("uri1");
+ info2.set_namespace_("namespace2");
+ info2.set_schema("Message");
+ info2.add_uris("uri3");
+ info2.add_uris("uri2");
+ EXPECT_THAT(result_proto.deleted_documents(),
+ UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
+
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryNotFound) {
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Delete all docs containing 'foo', which should be none of them. Both docs
+ // should still be present.
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ search_spec = SearchSpecProto::default_instance();
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
new file mode 100644
index 0000000..6ff21fb
--- /dev/null
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -0,0 +1,1920 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::_;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Return;
+using ::testing::SizeIs;
+
+constexpr std::string_view kIpsumText =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+ "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+ "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+ "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+ "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+ "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+ "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+ "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+ "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+ "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+ "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+ "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+ "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+ "placerat semper.";
+
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path) {
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+ filesystem.PRead(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+ /*offset=*/0);
+ return header;
+}
+
+void WriteDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path,
+ PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+ filesystem.Write(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::Initialize.
+class IcingSearchEngineInitializationTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
+
+std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+std::string GetHeaderFilename() {
+ return GetTestBaseDir() + "/icing_search_engine_header";
+}
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+DocumentProto CreateEmailDocument(const std::string& name_space,
+ const std::string& uri, int score,
+ const std::string& subject_content,
+ const std::string& body_content) {
+ return DocumentBuilder()
+ .SetKey(name_space, uri)
+ .SetSchema("Email")
+ .SetScore(score)
+ .AddStringProperty("subject", subject_content)
+ .AddStringProperty("body", body_content)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializingAgainSavesNonPersistedData) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document;
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ MaxIndexMergeSizeReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(std::numeric_limits<int32_t>::max());
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ NegativeMergeSizeReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(-1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ ZeroMergeSizeReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(0);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // One is fine, if a bit weird. It just means that the lite index will be
+ // smaller and will request a merge any time content is added to it.
+ options.set_index_merge_size(1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ NegativeMaxTokenLenReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_max_token_length(-1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ ZeroMaxTokenLenReturnsInvalidArgument) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_max_token_length(0);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) {
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ // This fails DocumentStore::Create()
+ ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
+ .WillByDefault(Return(false));
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+ EXPECT_THAT(initialize_result_proto.status().message(),
+ HasSubstr("Could not create directory"));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitMarkerFilePreviousFailuresAtThreshold) {
+ Filesystem filesystem;
+ DocumentProto email1 =
+ CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+ email1.set_creation_timestamp_ms(10000);
+ DocumentProto email2 =
+ CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+ email2.set_creation_timestamp_ms(10000);
+
+ {
+ // Create an index with a few documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ }
+
+ // Write an init marker file with 5 previously failed attempts.
+ std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+ {
+ ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+ int network_init_attempts = GHostToNetworkL(5);
+ // Write the updated number of attempts before we get started.
+ ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+ &network_init_attempts,
+ sizeof(network_init_attempts)));
+ ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+ }
+
+ {
+ // Create the index again and verify that initialization succeeds and no
+ // data is thrown out.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(5));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(email1));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(email2));
+ }
+
+ // The successful init should have thrown out the marker file.
+ ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitMarkerFilePreviousFailuresBeyondThreshold) {
+ Filesystem filesystem;
+ DocumentProto email1 =
+ CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+ DocumentProto email2 =
+ CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+ {
+ // Create an index with a few documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ }
+
+ // Write an init marker file with 6 previously failed attempts.
+ std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+
+ {
+ ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
+ int network_init_attempts = GHostToNetworkL(6);
+ // Write the updated number of attempts before we get started.
+ ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
+ &network_init_attempts,
+ sizeof(network_init_attempts)));
+ ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
+ }
+
+ {
+ // Create the index again and verify that initialization succeeds and all
+ // data is thrown out.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(6));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ }
+
+ // The successful init should have thrown out the marker file.
+ ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ SuccessiveInitFailuresIncrementsInitMarker) {
+ Filesystem filesystem;
+ DocumentProto email1 =
+ CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
+ DocumentProto email2 =
+ CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
+
+ {
+ // 1. Create an index with a few documents.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoIsOk());
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ }
+
+ {
+ // 2. Create an index that will encounter an IO failure when trying to
+ // create the document log.
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ std::string document_log_filepath =
+ icing_options.base_dir() + "/document_dir/document_log_v1";
+ auto get_filesize_lambda = [this,
+ &document_log_filepath](const char* filename) {
+ if (strncmp(document_log_filepath.c_str(), filename,
+ document_log_filepath.length()) == 0) {
+ return Filesystem::kBadFileSize;
+ }
+ return this->filesystem()->GetFileSize(filename);
+ };
+ ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
+ .WillByDefault(get_filesize_lambda);
+
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ // Fail to initialize six times in a row.
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(0));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(1));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(2));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(3));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(4));
+
+ init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(5));
+ }
+
+ {
+ // 3. Create the index again and verify that initialization succeeds and all
+ // data is thrown out.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ ASSERT_THAT(init_result.status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
+ Eq(6));
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ }
+
+ // The successful init should have thrown out the marker file.
+ std::string marker_filepath = GetTestBaseDir() + "/init_marker";
+ ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) {
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ {
+ // Basic initialization/setup
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
+
+ // We should be able to recover from this and access all our previous data
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Checks that DocumentLog is still ok
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Checks that the index is still ok so we can search over it
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Checks that Schema is still since it'll be needed to validate the document
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) {
+ {
+ // Basic initialization/setup
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ const std::string schema_file =
+ absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
+ const std::string corrupt_data = "1234";
+ EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
+ corrupt_data.size()));
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ UnableToRecoverFromCorruptDocumentLog) {
+ {
+ // Basic initialization/setup
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ const std::string corrupt_data = "1234";
+ EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
+ corrupt_data.data(), corrupt_data.size()));
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromInconsistentSchemaStore) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2_with_additional_property =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("additional", "content")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ {
+ // Initializes folder and schema
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ property = type->add_properties();
+ property->set_property_name("additional");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
+ ProtoIsOk());
+
+ // Won't get us anything because "additional" isn't marked as an indexed
+ // property in the schema
+ SearchSpecProto search_spec;
+ search_spec.set_query("additional:content");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ // This schema will change the SchemaTypeIds from the previous schema_
+ // (since SchemaTypeIds are assigned based on order of the types, and this
+ // new schema changes the ordering of previous types)
+ SchemaProto new_schema;
+ auto type = new_schema.add_types();
+ type->set_schema_type("Email");
+
+ type = new_schema.add_types();
+ type->set_schema_type("Message");
+
+ // Adding a new property changes the SectionIds (since SectionIds are
+ // assigned based on alphabetical order of indexed sections, marking
+ // "additional" as an indexed property will push the "body" property to a
+ // different SectionId)
+ auto property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ property = type->add_properties();
+ property->set_property_name("additional");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
+ } // Will persist new schema
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // We can insert a Email document since we kept the new schema
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace", "email_uri")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = email_document;
+
+ EXPECT_THAT(icing.Get("namespace", "email_uri",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ SearchSpecProto search_spec;
+
+ // The section restrict will ensure we are using the correct, updated
+ // SectionId in the Index
+ search_spec.set_query("additional:content");
+
+ // Schema type filter will ensure we're using the correct, updated
+ // SchemaTypeId in the DocumentStore
+ search_spec.add_schema_type_filters("Message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2_with_additional_property;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromInconsistentDocumentStore) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+
+ {
+ // Initializes folder and schema, index one document
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
+
+ // Puts a second document into DocumentStore but doesn't index it.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_EXPECT_OK(document_store->Put(document2));
+ }
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ // Index Restoration should be triggered here and document2 should be
+ // indexed.
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+
+ // DocumentStore kept the additional document
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // We indexed the additional document
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentIndex) {
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ {
+ // Initializes folder and schema, index one document
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ // Pretend we lost the entire index
+ EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
+ absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str()));
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Check that our index is ok by searching over the restored index
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ {
+ // Initializes folder and schema, index one document
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ // Pretend index is corrupted
+ const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
+ ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Check that our index is ok by searching over the restored index
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ SchemaProto email_schema = CreateMessageSchema();
+ EXPECT_THAT(icing.SetSchema(email_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ DocumentProto doc = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing.Put(doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing
+ .Get(doc.namespace_(), doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
+ .status()
+ .code(),
+ Eq(StatusProto::FAILED_PRECONDITION));
+
+ SearchSpecProto search_spec = SearchSpecProto::default_instance();
+ ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ constexpr int kSomePageToken = 12;
+ EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
+
+ EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Optimize().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndex) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the index file to trigger RestoreIndexIfNeeded.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseLiteIndex) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the last document from the document log
+ {
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ filesystem()->DeleteFile(document_log_file.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
+ std::move(create_result.proto_log);
+
+ document = DocumentBuilder(document).SetUri("fake_type/0").Build();
+ DocumentWrapper wrapper;
+ *wrapper.mutable_document() = document;
+ ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ *wrapper.mutable_document() = document;
+ ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+ }
+
+ // 3. Create the index again. This should throw out the lite index and trigger
+ // index restoration which will only restore the two documents in the main
+ // index.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the documents that were in the main index should be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIndex) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the last two documents from the document log.
+ {
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ filesystem()->DeleteFile(document_log_file.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
+ std::move(create_result.proto_log);
+
+ document = DocumentBuilder(document).SetUri("fake_type/0").Build();
+ DocumentWrapper wrapper;
+ *wrapper.mutable_document() = document;
+ ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+ }
+
+ // 3. Create the index again. This should throw out the lite and main index
+ // and trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the first document should be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) {
+ // 1. Create an index with a single document in it that has no indexed
+ // content.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Set a schema for a single type that has no indexed properties.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("unindexedField")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add a document that contains no indexed content.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("unindexedField",
+ "Don't you dare search over this!")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again. This should NOT trigger a recovery of any kind.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
+ // 1. Create an index with a single document in it that has no valid indexed
+ // tokens in its content.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Set a schema for a single type that has no indexed properties.
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add a document that contains no valid indexed content - just punctuation.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "?...!")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again. This should NOT trigger a recovery of any kind.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogFunctionLatency) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogNumberOfDocuments) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(0));
+
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(1));
+
+ // Put another document.
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
+ // Even though the fake timer will return 10, all the latency numbers related
+ // to recovery / restoration should be 0 during the first-time initialization.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCausePartialDataLoss) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Append a non-checksummed document. This will mess up the checksum of the
+ // proto log, forcing it to rewind and later return a DATA_LOSS error.
+ const std::string serialized_document = document.SerializeAsString();
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+
+ int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
+ filesystem()->PWrite(document_log_file.c_str(), file_size,
+ serialized_document.data(),
+ serialized_document.size());
+ }
+
+ {
+ // Document store will rewind to previous checkpoint. The cause should be
+ // DATA_LOSS and the data status should be PARTIAL_LOSS.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::PARTIAL_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseCompleteDataLoss) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ int64_t corruptible_offset;
+
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // There's some space at the beginning of the file (e.g. header, kmagic,
+ // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+ // corrupt that region, so we need to figure out the offset at which
+ // documents will be written to - which is the file size after
+ // initialization.
+ corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ }
+
+ {
+ // "Corrupt" the content written in the log. Make the corrupt document
+ // smaller than our original one so we don't accidentally write past our
+ // file.
+ DocumentProto document =
+ DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+ std::string serialized_document = document.SerializeAsString();
+ ASSERT_TRUE(filesystem()->PWrite(
+ document_log_file.c_str(), corruptible_offset,
+ serialized_document.data(), serialized_document.size()));
+
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+ ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+ // Set dirty bit to true to reflect that something changed in the log.
+ header.SetDirtyFlag(true);
+ header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+ WriteDocumentLogHeader(*filesystem(), document_log_file, header);
+ }
+
+ {
+ // Document store will completely rewind. The cause should be DATA_LOSS and
+ // the data status should be COMPLETE_LOSS.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::COMPLETE_LOSS));
+ // The complete rewind of ground truth causes us to clear the index, but
+ // that's not considered a restoration.
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+ {
+ // Initialize and put a document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the index file to trigger RestoreIndexIfNeeded.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+ }
+
+ {
+ // Index is empty but ground truth is not. Index should be restored due to
+ // the inconsistency.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ {
+ // Simulate a schema change where power is lost after the schema is written.
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
+ }
+
+ {
+ // Both document store and index should be recovered from checksum mismatch.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+
+ {
+ // No recovery should be needed.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseIndexIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // lambda to fail OpenForWrite on lite index hit buffer once.
+ bool has_failed_already = false;
+ auto open_write_lambda = [this, &has_failed_already](const char* filename) {
+ std::string lite_index_buffer_file_path =
+ absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
+ std::string filename_string(filename);
+ if (!has_failed_already && filename_string == lite_index_buffer_file_path) {
+ has_failed_already = true;
+ return -1;
+ }
+ return this->filesystem()->OpenForWrite(filename);
+ };
+
+ auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+ // This fails Index::Create() once.
+ ON_CALL(*mock_icing_filesystem, OpenForWrite)
+ .WillByDefault(open_write_lambda);
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::move(mock_icing_filesystem),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseDocStoreIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+ {
+ // Initialize and put one document.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // lambda to fail Read on document store header once.
+ bool has_failed_already = false;
+ auto read_lambda = [this, &has_failed_already](const char* filename,
+ void* buf, size_t buf_size) {
+ std::string document_store_header_file_path =
+ absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
+ std::string filename_string(filename);
+ if (!has_failed_already &&
+ filename_string == document_store_header_file_path) {
+ has_failed_already = true;
+ return false;
+ }
+ return this->filesystem()->Read(filename, buf, buf_size);
+ };
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ // This fails DocumentStore::InitializeDerivedFiles() once.
+ ON_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
+ .WillByDefault(read_lambda);
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ {
+ // Delete the schema store header file to trigger an I/O error.
+ std::string schema_store_header_file_path =
+ GetSchemaDir() + "/schema_store_header";
+ filesystem()->DeleteFile(schema_store_header_file_path.c_str());
+ }
+
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
+ InitializeShouldLogNumberOfSchemaTypes) {
+ {
+ // Initialize an empty storage.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ // There should be 0 schema types.
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(0));
+
+ // Set a schema with one type config.
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ // There should be 1 schema type.
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(1));
+
+ // Create and set a schema with two type configs: Email and Message.
+ SchemaProto schema = CreateEmailSchema();
+
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(2));
+ }
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
new file mode 100644
index 0000000..da02c4a
--- /dev/null
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -0,0 +1,974 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::Lt;
+using ::testing::Return;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::Optimize.
+class IcingSearchEngineOptimizeTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ AllPageTokensShouldBeInvalidatedAfterOptimization) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ // Searches and gets the first page, 1 result
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ // Now document1 is still to be fetched.
+
+ OptimizeResultProto optimize_result_proto;
+ optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
+ optimize_result_proto.mutable_status()->set_message("");
+ OptimizeResultProto actual_result = icing.Optimize();
+ actual_result.clear_optimize_stats();
+ ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
+
+ // Tries to fetch the second page, no results since all tokens have been
+ // invalidated during Optimize()
+ expected_search_result_proto.clear_results();
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri1) not found.");
+ {
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Deletes document1
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+ const std::string document_log_path =
+ icing_options.base_dir() + "/document_dir/" +
+ DocumentLogCreator::GetDocumentLogFilename();
+ int64_t document_log_size_before =
+ filesystem()->GetFileSize(document_log_path.c_str());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ int64_t document_log_size_after =
+ filesystem()->GetFileSize(document_log_path.c_str());
+
+ // Validates that document can't be found right after Optimize()
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ // Validates that document is actually removed from document log
+ EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationShouldDeleteTemporaryDirectory) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create a tmp dir that will be used in Optimize() to swap files,
+ // this validates that any tmp dirs will be deleted before using.
+ const std::string tmp_dir =
+ icing_options.base_dir() + "/document_dir_optimize_tmp";
+
+ const std::string tmp_file = tmp_dir + "/file";
+ ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
+ ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
+ ASSERT_TRUE(fd.is_valid());
+ ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+ fd.reset();
+
+ EXPECT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
+ EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .Build();
+
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(1000);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Just initialized, nothing is optimizable yet.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Only have active documents, nothing is optimizable yet.
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+ // Deletes document1
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+ int64_t first_estimated_optimizable_bytes =
+ optimize_info.estimated_optimizable_bytes();
+
+ // Add a second document, but it'll be expired since the time (1000) is
+ // greater than the document's creation timestamp (100) + the document's ttl
+ // (500)
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
+ Gt(first_estimated_optimizable_bytes));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+
+ // Optimize
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ }
+
+ {
+ // Recreate with new time
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(5000);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Nothing is optimizable now that everything has been optimized away.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
+ }
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Get() and Put() are good right after Optimize()
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ *expected_get_result_proto.mutable_document() = document3;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ *expected_get_result_proto.mutable_document() = document4;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) {
+ DocumentProto empty_document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto empty_document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto empty_document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Get() and Put() are good right after Optimize()
+ *expected_get_result_proto.mutable_document() = empty_document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
+ .status()
+ .code(),
+ Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, DeleteShouldWorkAfterOptimization) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Delete() works right after Optimize()
+ EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(
+ StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri1) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_get_result_proto.mutable_status()->clear_message();
+ *expected_get_result_proto.mutable_document() = document2;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri1) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri2) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizationFailureUninitializesIcing) {
+ // Setup filesystem to fail
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ bool just_swapped_files = false;
+ auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) {
+ if (just_swapped_files) {
+ // We should fail the first call immediately after swapping files.
+ just_swapped_files = false;
+ return false;
+ }
+ return filesystem()->CreateDirectoryRecursively(dir_name);
+ };
+ ON_CALL(*mock_filesystem, CreateDirectoryRecursively)
+ .WillByDefault(create_dir_lambda);
+
+ auto swap_lambda = [&just_swapped_files](const char* first_dir,
+ const char* second_dir) {
+ just_swapped_files = true;
+ return false;
+ };
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+ HasSubstr("document_dir")))
+ .WillByDefault(swap_lambda);
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // The mocks should cause an unrecoverable error during Optimize - returning
+ // INTERNAL.
+ ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
+
+ // Ordinary operations should fail safely.
+ SchemaProto simple_schema;
+ auto type = simple_schema.add_types();
+ type->set_schema_type("type0");
+ auto property = type->add_properties();
+ property->set_property_name("prop0");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ DocumentProto simple_doc = DocumentBuilder()
+ .SetKey("namespace0", "uri0")
+ .SetSchema("type0")
+ .AddStringProperty("prop0", "foo")
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ ResultSpecProto result_spec;
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ EXPECT_THAT(icing.SetSchema(simple_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Put(simple_doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing
+ .Get(simple_doc.namespace_(), simple_doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ // Reset should get icing back to a safe (empty) and working state.
+ EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
+ EXPECT_THAT(icing
+ .Get(simple_doc.namespace_(), simple_doc.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoIsOk());
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) {
+ // Creates 3 test schemas
+ SchemaProto schema1 = SchemaProto(CreateMessageSchema());
+
+ SchemaProto schema2 = SchemaProto(schema1);
+ auto new_property2 = schema2.mutable_types(0)->add_properties();
+ new_property2->set_property_name("property2");
+ new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
+ new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ new_property2->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ new_property2->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ SchemaProto schema3 = SchemaProto(schema2);
+ auto new_property3 = schema3.mutable_types(0)->add_properties();
+ new_property3->set_property_name("property3");
+ new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
+ new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ new_property3->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ new_property3->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that SetSchema() works right after Optimize()
+ EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk());
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) {
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+
+ // Validates that Search() works right after Optimize()
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // Destroys IcingSearchEngine to make sure nothing is cached.
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ IcingShouldWorkFineIfOptimizationIsAborted) {
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ {
+ // Initializes a normal icing to create files needed
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ }
+
+ // Creates a mock filesystem in which DeleteDirectoryRecursively() always
+ // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
+ // it return ABORTED_ERROR.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ON_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("_optimize_tmp")))
+ .WillByDefault(Return(false));
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED));
+
+ // Now optimization is aborted, we verify that document-related functions
+ // still work as expected.
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document1;
+ EXPECT_THAT(
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("m");
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
+ // Creates a mock filesystem in which SwapFiles() always fails and deletes the
+ // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+ HasSubstr("document_dir")))
+ .WillByDefault([this](const char* one, const char* two) {
+ filesystem()->DeleteDirectoryRecursively(one);
+ filesystem()->DeleteDirectoryRecursively(two);
+ return false;
+ });
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ // Optimize() fails due to filesystem error
+ OptimizeResultProto result = icing.Optimize();
+ EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ // Should rebuild the index for data loss.
+ EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
+ Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
+
+ // Document is not found because original file directory is missing
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ DocumentProto new_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "new body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("m");
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // Searching old content returns nothing because original file directory is
+ // missing
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec.set_query("n");
+
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ new_document;
+
+ // Searching new content returns the new document
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest,
+ OptimizationShouldRecoverIfDataFilesAreMissing) {
+ // Creates a mock filesystem in which SwapFiles() always fails and empties the
+ // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
+ HasSubstr("document_dir")))
+ .WillByDefault([this](const char* one, const char* two) {
+ filesystem()->DeleteDirectoryRecursively(one);
+ filesystem()->CreateDirectoryRecursively(one);
+ filesystem()->DeleteDirectoryRecursively(two);
+ filesystem()->CreateDirectoryRecursively(two);
+ return false;
+ });
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ // Optimize() fails due to filesystem error
+ OptimizeResultProto result = icing.Optimize();
+ EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+ // Should rebuild the index for data loss.
+ EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
+ Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
+
+ // Document is not found because original files are missing
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri) not found.");
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ DocumentProto new_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "new body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("m");
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // Searching old content returns nothing because original files are missing
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec.set_query("n");
+
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ new_document;
+
+ // Searching new content returns the new document
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(10000);
+ auto icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create three documents.
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ document2.set_creation_timestamp_ms(9000);
+ document2.set_ttl_ms(500);
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+ // Delete the first document.
+ ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+ ProtoIsOk());
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ OptimizeStatsProto expected;
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(3);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(1);
+ expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
+
+ // Run Optimize
+ OptimizeResultProto result = icing->Optimize();
+ // Depending on how many blocks the documents end up spread across, it's
+ // possible that Optimize can remove documents without shrinking storage. The
+ // first Optimize call will also write the OptimizeStatusProto for the first
+ // time which will take up 1 block. So make sure that before_size is no less
+ // than after_size - 1 block.
+ uint32_t page_size = getpagesize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after() - page_size));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(20000);
+ icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(0);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(10000);
+ expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Eq(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ // Delete the last document.
+ ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
+ ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(0);
+ // Should rebuild the index since all documents are removed.
+ expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_put_test.cc b/icing/icing-search-engine_put_test.cc
new file mode 100644
index 0000000..ed72f17
--- /dev/null
+++ b/icing/icing-search-engine_put_test.cc
@@ -0,0 +1,481 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+constexpr std::string_view kIpsumText =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+ "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+ "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+ "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+ "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+ "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+ "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+ "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+ "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+ "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+ "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+ "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+ "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+ "placerat semper.";
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Put.
+class IcingSearchEnginePutTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEnginePutTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // A length of 1 is allowed - even though it would be strange to want
+ // this.
+ options.set_max_token_length(1);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // "message" should have been truncated to "m"
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ // The indexed tokens were truncated to length of 1, so "m" will match
+ search_spec.set_query("m");
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // The query token is also truncated to length of 1, so "me"->"m" matches "m"
+ search_spec.set_query("me");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // The query token is still truncated to length of 1, so "massage"->"m"
+ // matches "m"
+ search_spec.set_query("massage");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEnginePutTest,
+ MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ // Set token length to max. This is allowed (it just means never to
+ // truncate tokens). However, this does mean that tokens that exceed the
+ // size of the lexicon will cause indexing to fail.
+ options.set_max_token_length(std::numeric_limits<int32_t>::max());
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add a document that just barely fits under the max document limit.
+ // This will still fail to index because we won't actually have enough
+ // room in the lexicon to fit this content.
+ std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", std::move(enormous_string))
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(),
+ ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("p");
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutWithoutSchemaFailedPrecondition) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
+}
+
+TEST_F(IcingSearchEnginePutTest, IndexingDocMergeFailureResets) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the index file to trigger RestoreIndexIfNeeded.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+
+ // 3. Setup a mock filesystem to fail to grow the main index once.
+ bool has_failed_already = false;
+ auto open_write_lambda = [this, &has_failed_already](const char* filename) {
+ std::string main_lexicon_suffix = "/main-lexicon.prop.2";
+ std::string filename_string(filename);
+ if (!has_failed_already &&
+ filename_string.length() >= main_lexicon_suffix.length() &&
+ filename_string.substr(
+ filename_string.length() - main_lexicon_suffix.length(),
+ main_lexicon_suffix.length()) == main_lexicon_suffix) {
+ has_failed_already = true;
+ return -1;
+ }
+ return this->filesystem()->OpenForWrite(filename);
+ };
+ auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+ ON_CALL(*mock_icing_filesystem, OpenForWrite)
+ .WillByDefault(open_write_lambda);
+
+ // 4. Create the index again. This should trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::move(mock_icing_filesystem),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the last document that was added should still be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
+ }
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogFunctionLatency) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogDocumentStoreStats) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
+ Eq(10));
+ size_t document_size = put_result_proto.put_document_stats().document_size();
+ EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
+ EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
+ sizeof(DocumentProto::InternalFields)));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexingStats) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ PutResultProto put_result_proto = icing.Put(document);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
+ // No merge should happen.
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(0));
+ // The input document has 2 tokens.
+ EXPECT_THAT(put_result_proto.put_document_stats()
+ .tokenization_stats()
+ .num_tokens_indexed(),
+ Eq(2));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexMergeLatency) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("icing", "fake_type/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+
+ // Create an icing instance with index_merge_size = document1's size.
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+ icing_options.set_index_merge_size(document1.ByteSizeLong());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Putting document2 should trigger an index merge.
+ PutResultProto put_result_proto = icing.Put(document2);
+ EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(10));
+}
+
+TEST_F(IcingSearchEnginePutTest, PutDocumentIndexFailureDeletion) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Testing has shown that adding ~600,000 terms generated this way will
+ // fill up the hit buffer.
+ std::vector<std::string> terms = GenerateUniqueTerms(600000);
+ std::string content = absl_ports::StrJoin(terms, " ");
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "foo " + content)
+ .Build();
+ // We failed to add the document to the index fully. This means that we should
+ // reject the document from Icing entirely.
+ ASSERT_THAT(icing.Put(document).status(),
+ ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+
+ // Make sure that the document isn't searchable.
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+
+ SearchResultProto search_results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), IsEmpty());
+
+ // Make sure that the document isn't retrievable.
+ GetResultProto get_result =
+ icing.Get("namespace", "uri1", GetResultSpecProto::default_instance());
+ ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
new file mode 100644
index 0000000..b369d40
--- /dev/null
+++ b/icing/icing-search-engine_schema_test.cc
@@ -0,0 +1,1698 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Return;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to
+// IcingSearchEngine::GetSchema and IcingSearchEngine::SetSchema.
+class IcingSearchEngineSchemaTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
+ // Create a type config with a circular reference.
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto* body = type->add_properties();
+ body->set_property_name("recipient");
+ body->set_schema_type("Person");
+ body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_document_indexing_config()->set_index_nested_properties(true);
+
+ type = schema.add_types();
+ type->set_schema_type("Person");
+
+ body = type->add_properties();
+ body->set_property_name("recipient");
+ body->set_schema_type("Message");
+ body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_document_indexing_config()->set_index_nested_properties(true);
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, FailToReadSchema) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Successfully initialize and set a schema
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ }
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+
+ // This fails FileBackedProto::Read() when we try to check the schema we
+ // had previously set
+ ON_CALL(*mock_filesystem,
+ OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
+ .WillByDefault(Return(-1));
+
+ TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ InitializeResultProto initialize_result_proto = test_icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+ EXPECT_THAT(initialize_result_proto.status().message(),
+ HasSubstr("Unable to open file for read"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, FailToWriteSchema) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ // This fails FileBackedProto::Write()
+ ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb")))
+ .WillByDefault(Return(-1));
+
+ TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SetSchemaResultProto set_schema_result_proto =
+ icing.SetSchema(CreateMessageSchema());
+ EXPECT_THAT(set_schema_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
+ EXPECT_THAT(set_schema_result_proto.status().message(),
+ HasSubstr("Unable to open file for write"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleFails) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with properties { "title", "body"}
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // 2. Add an email document
+ DocumentProto doc = DocumentBuilder()
+ .SetKey("emails", "email#1")
+ .SetSchema("Email")
+ .AddStringProperty("title", "Hello world.")
+ .AddStringProperty("body", "Goodnight Moon.")
+ .Build();
+ EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 3. Set a schema that deletes email. This should fail.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Message");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ // 4. Try to delete by email type. This should succeed because email wasn't
+ // deleted in step 3.
+ EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaIncompatibleForceOverrideSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with properties { "title", "body"}
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // 2. Add an email document
+ DocumentProto doc = DocumentBuilder()
+ .SetKey("emails", "email#1")
+ .SetSchema("Email")
+ .AddStringProperty("title", "Hello world.")
+ .AddStringProperty("body", "Goodnight Moon.")
+ .Build();
+ EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 3. Set a schema that deletes email with force override. This should
+ // succeed and delete the email type.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Message");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+ // 4. Try to delete by email type. This should fail because email was
+ // already deleted.
+ EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaUnsetVersionIsZero) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaCompatibleVersionUpdateSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that adds a new optional property and updates version.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // 3. SetSchema should succeed and the version number should be updated.
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleVersionUpdateFails) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // 3. SetSchema should fail and the version number should NOT be updated.
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+ // with force override to true.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // 3. SetSchema should succeed and the version number should be updated.
+ EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaNoChangeVersionUpdateSucceeds) {
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 1. Create a schema with an Email type with version 1
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(1);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+ }
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 2. Create schema that only changes the version.
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_version(2);
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // 3. SetSchema should succeed and the version number should be updated.
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+ }
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaDuplicateTypesReturnsAlreadyExists) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with types { "Email", "Message" and "Email" }
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ type = schema.add_types();
+ type->set_schema_type("Message");
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ *schema.add_types() = schema.types(0);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::ALREADY_EXISTS));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaDuplicatePropertiesReturnsAlreadyExists) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with an Email type with properties { "title", "body" and
+ // "title" }
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::ALREADY_EXISTS));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchema) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ auto message_document = CreateMessageDocument("namespace", "uri");
+
+ auto schema_with_message = CreateMessageSchema();
+
+ SchemaProto schema_with_email;
+ SchemaTypeConfigProto* type = schema_with_email.add_types();
+ type->set_schema_type("Email");
+ PropertyConfigProto* property = type->add_properties();
+ property->set_property_name("title");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ SchemaProto schema_with_email_and_message = schema_with_email;
+ type = schema_with_email_and_message.add_types();
+ type->set_schema_type("Message");
+ property = type->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Create an arbitrary invalid schema
+ SchemaProto invalid_schema;
+ SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
+ empty_type->set_schema_type("");
+
+ // Make sure we can't set invalid schemas
+ SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
+ EXPECT_THAT(set_schema_result.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+ // Can add an document of a set schema
+ set_schema_result = icing.SetSchema(schema_with_message);
+ EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+ // Schema with Email doesn't have Message, so would result incompatible
+ // data
+ set_schema_result = icing.SetSchema(schema_with_email);
+ EXPECT_THAT(set_schema_result.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+ // Can expand the set of schema types and add an document of a new
+ // schema type
+ set_schema_result = icing.SetSchema(schema_with_email_and_message);
+ EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+ // Can't add an document whose schema isn't set
+ auto photo_document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Photo")
+ .AddStringProperty("creator", "icing")
+ .Build();
+ PutResultProto put_result_proto = icing.Put(photo_document);
+ EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(put_result_proto.status().message(),
+ HasSubstr("'Photo' not found"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
+ schema_with_no_indexed_property.mutable_types(0)
+ ->mutable_properties(0)
+ ->clear_string_indexing_config();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(schema_with_no_indexed_property);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Nothing will be index and Search() won't return anything.
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ SchemaProto schema_with_indexed_property = CreateMessageSchema();
+ // Index restoration should be triggered here because new schema requires more
+ // properties to be indexed.
+ set_schema_result = icing.SetSchema(schema_with_indexed_property);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Message");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto person_proto =
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Bill Lundbergh")
+ .Build())
+ .Build();
+
+ // "sender.name" should get assigned property id 0 and subject should get
+ // property id 1.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // document should match a query for 'Bill' in 'sender.name', but not in
+ // 'subject'
+ SearchSpecProto search_spec;
+ search_spec.set_query("sender.name:Bill");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto result;
+ result.mutable_status()->set_code(StatusProto::OK);
+ *result.mutable_results()->Add()->mutable_document() = document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+ search_spec.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Now update the schema with index_nested_properties=false. This should
+ // reassign property ids, lead to an index rebuild and ensure that nothing
+ // match a query for "Bill".
+ SchemaProto no_nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(no_nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // document shouldn't match a query for 'Bill' in either 'sender.name' or
+ // 'subject'
+ search_spec.set_query("sender.name:Bill");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 'body' should have a property id of 0 and 'subject' should have a property
+ // id of 1.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create a document with only a subject property.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ SearchSpecProto search_spec;
+ search_spec.set_query("subject:tps");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto result;
+ result.mutable_status()->set_code(StatusProto::OK);
+ *result.mutable_results()->Add()->mutable_document() = document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+ // Now update the schema to remove the 'body' field. This is backwards
+ // incompatible, but document should be preserved because it doesn't contain a
+ // 'body' field. If the index is correctly rebuilt, then 'subject' will now
+ // have a property id of 0. If not, then the hits in the index will still have
+ // have a property id of 1 and therefore it won't be found.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ search_spec.set_query("subject:tps");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+}
+
+TEST_F(
+ IcingSearchEngineSchemaTest,
+ ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 'body' should have a property id of 0 and 'subject' should have a property
+ // id of 1.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create a document with only a subject property.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ SearchSpecProto search_spec;
+ search_spec.set_query("subject:tps");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto result;
+ result.mutable_status()->set_code(StatusProto::OK);
+ *result.mutable_results()->Add()->mutable_document() = document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+ // Now update the schema to remove the 'body' field. This is backwards
+ // incompatible, but document should be preserved because it doesn't contain a
+ // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to'
+ // will now have property ids of 0 and 1 respectively. If not, then the hits
+ // in the index will still have have a property id of 1 and therefore it won't
+ // be found.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("to")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ search_spec.set_query("subject:tps");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto email_schema_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument("Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("company")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_schema_type)
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create two documents - a person document and an email document - both docs
+ // should be deleted when we remove the 'company' field from the person type.
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "Bill Lundbergh")
+ .AddStringProperty("company", "Initech Corp.")
+ .Build();
+ EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty("sender", person_document)
+ .Build();
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+ // We should be able to retrieve both documents.
+ GetResultProto get_result =
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoIsOk());
+ EXPECT_THAT(get_result.document(), EqualsProto(person_document));
+
+ get_result =
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoIsOk());
+ EXPECT_THAT(get_result.document(), EqualsProto(email_document));
+
+ // Now update the schema to remove the 'company' field. This is backwards
+ // incompatible, *both* documents should be deleted because both fail
+ // validation (they each contain a 'Person' that has a non-existent property).
+ nested_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_schema_type)
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ nested_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Both documents should be deleted now.
+ get_result =
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ get_result =
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+// TODO(b/256022027): add unit tests for join incompatible schema change to make
+// sure the joinable cache is rebuilt correctly.
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema_with_optional_subject;
+ auto type = schema_with_optional_subject.add_types();
+ type->set_schema_type("email");
+
+ // Add a OPTIONAL property
+ auto property = type->add_properties();
+ property->set_property_name("subject");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(),
+ ProtoIsOk());
+
+ DocumentProto email_document_without_subject =
+ DocumentBuilder()
+ .SetKey("namespace", "without_subject")
+ .SetSchema("email")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto email_document_with_subject =
+ DocumentBuilder()
+ .SetKey("namespace", "with_subject")
+ .SetSchema("email")
+ .AddStringProperty("subject", "foo")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk());
+
+ SchemaProto schema_with_required_subject;
+ type = schema_with_required_subject.add_types();
+ type->set_schema_type("email");
+
+ // Add a REQUIRED property
+ property = type->add_properties();
+ property->set_property_name("subject");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Can't set the schema since it's incompatible
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(schema_with_required_subject);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result_proto;
+ expected_set_schema_result_proto.mutable_status()->set_code(
+ StatusProto::FAILED_PRECONDITION);
+ expected_set_schema_result_proto.mutable_status()->set_message(
+ "Schema is incompatible.");
+ expected_set_schema_result_proto.add_incompatible_schema_types("email");
+
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+
+ // Force set it
+ set_schema_result =
+ icing.SetSchema(schema_with_required_subject,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result_proto.mutable_status()->clear_message();
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
+
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = email_document_with_subject;
+
+ EXPECT_THAT(icing.Get("namespace", "with_subject",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // The document without a subject got deleted because it failed validation
+ // against the new schema
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, without_subject) not found.");
+ expected_get_result_proto.clear_document();
+
+ EXPECT_THAT(icing.Get("namespace", "without_subject",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaDeletesDocumentsAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("email");
+ type = schema.add_types();
+ type->set_schema_type("message");
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace", "email_uri")
+ .SetSchema("email")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message_document =
+ DocumentBuilder()
+ .SetKey("namespace", "message_uri")
+ .SetSchema("message")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+ // Clear the schema and only add the "email" type, essentially deleting the
+ // "message" type
+ SchemaProto new_schema;
+ type = new_schema.add_types();
+ type->set_schema_type("email");
+
+ // Can't set the schema since it's incompatible
+ SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_result;
+ expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+ expected_result.mutable_status()->set_message("Schema is incompatible.");
+ expected_result.add_deleted_schema_types("message");
+
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+
+ // Force set it
+ set_schema_result =
+ icing.SetSchema(new_schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_result.mutable_status()->set_code(StatusProto::OK);
+ expected_result.mutable_status()->clear_message();
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
+
+ // "email" document is still there
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = email_document;
+
+ EXPECT_THAT(icing.Get("namespace", "email_uri",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // "message" document got deleted
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, message_uri) not found.");
+ expected_get_result_proto.clear_document();
+
+ EXPECT_THAT(icing.Get("namespace", "message_uri",
+ GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaNotFound) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+ EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeFailedPrecondition) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ GetSchemaTypeResultProto get_schema_type_result_proto =
+ icing.GetSchemaType("nonexistent_schema");
+ EXPECT_THAT(get_schema_type_result_proto.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(get_schema_type_result_proto.status().message(),
+ HasSubstr("Schema not set"));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ GetSchemaTypeResultProto expected_get_schema_type_result_proto;
+ expected_get_schema_type_result_proto.mutable_status()->set_code(
+ StatusProto::OK);
+ *expected_get_schema_type_result_proto.mutable_schema_type_config() =
+ CreateMessageSchema().types(0);
+ EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()),
+ EqualsProto(expected_get_schema_type_result_proto));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
+ SchemaProto incompatible_schema = schema;
+ incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+ PropertyConfigProto::Cardinality::REQUIRED);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
+
+ // Since we don't have any documents yet, we can't detect this edge-case. But
+ // it should be fine since there aren't any documents to be invalidated.
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
+}
+
+TEST_F(IcingSearchEngineSchemaTest, SetSchemaCanDetectPreviousSchemaWasLost) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
+ SchemaProto incompatible_schema = schema;
+ incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+ PropertyConfigProto::Cardinality::REQUIRED);
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Can retrieve by namespace/uri
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document;
+
+ ASSERT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Can search for it
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
+
+ // Setting the new, different schema will remove incompatible documents
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
+
+ // Can't retrieve by namespace/uri
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
+ expected_get_result_proto.mutable_status()->set_message(
+ "Document (namespace, uri) not found.");
+
+ EXPECT_THAT(
+ icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Can't search for it
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineSchemaTest, IcingShouldWorkFor64Sections) {
+ // Create a schema with 64 sections
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ // Person has 4 sections.
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ // Email has 16 sections.
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("date")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("time")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("receiver")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("cc")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ // EmailCollection has 64 sections.
+ .SetType("EmailCollection")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email1")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email2")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email3")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("email4")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .AddStringProperty("phoneNumber", "000-000-001")
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .AddStringProperty("phoneNumber", "000-000-002")
+ .Build();
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey("namespace", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .AddStringProperty("phoneNumber", "000-000-003")
+ .Build();
+ DocumentProto email1 = DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-01")
+ .AddStringProperty("time", "1:00 PM")
+ .AddDocumentProperty("sender", person1)
+ .AddDocumentProperty("receiver", person2)
+ .AddDocumentProperty("cc", person3)
+ .Build();
+ DocumentProto email2 = DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-02")
+ .AddStringProperty("time", "2:00 PM")
+ .AddDocumentProperty("sender", person2)
+ .AddDocumentProperty("receiver", person1)
+ .AddDocumentProperty("cc", person3)
+ .Build();
+ DocumentProto email3 = DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-03")
+ .AddStringProperty("time", "3:00 PM")
+ .AddDocumentProperty("sender", person3)
+ .AddDocumentProperty("receiver", person1)
+ .AddDocumentProperty("cc", person2)
+ .Build();
+ DocumentProto email4 = DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("body", "test body")
+ .AddStringProperty("subject", "test subject")
+ .AddStringProperty("date", "2022-08-04")
+ .AddStringProperty("time", "4:00 PM")
+ .AddDocumentProperty("sender", person3)
+ .AddDocumentProperty("receiver", person2)
+ .AddDocumentProperty("cc", person1)
+ .Build();
+ DocumentProto email_collection =
+ DocumentBuilder()
+ .SetKey("namespace", "email_collection")
+ .SetSchema("EmailCollection")
+ .AddDocumentProperty("email1", email1)
+ .AddDocumentProperty("email2", email2)
+ .AddDocumentProperty("email3", email3)
+ .AddDocumentProperty("email4", email4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk());
+
+ const std::vector<std::string> query_terms = {
+ "first1", "last2", "email3@gmail.com", "000-000-001",
+ "body", "subject", "2022-08-02", "3\\:00"};
+ SearchResultProto expected_document;
+ expected_document.mutable_status()->set_code(StatusProto::OK);
+ *expected_document.mutable_results()->Add()->mutable_document() =
+ email_collection;
+ for (const std::string& query_term : query_terms) {
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query(query_term);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_document));
+ }
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("foo");
+ SearchResultProto expected_no_documents;
+ expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
new file mode 100644
index 0000000..9ebd060
--- /dev/null
+++ b/icing/icing-search-engine_search_test.cc
@@ -0,0 +1,4143 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-processor.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/query/query-features.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-log-creator.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/snippet-helpers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::_;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Le;
+using ::testing::Lt;
+using ::testing::Matcher;
+using ::testing::Ne;
+using ::testing::Return;
+using ::testing::SizeIs;
+using ::testing::StrEq;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Search
+// and IcingSearchEngine::GetNextPage.
+class IcingSearchEngineSearchTest
+ : public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+// Non-zero value so we don't override it to be the current time
+constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
+ return DocumentBuilder()
+ .SetKey(std::move(name_space), std::move(uri))
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+}
+
+DocumentProto CreateEmailDocument(const std::string& name_space,
+ const std::string& uri, int score,
+ const std::string& subject_content,
+ const std::string& body_content) {
+ return DocumentBuilder()
+ .SetKey(name_space, uri)
+ .SetSchema("Email")
+ .SetScore(score)
+ .AddStringProperty("subject", subject_content)
+ .AddStringProperty("body", body_content)
+ .Build();
+}
+
+SchemaProto CreateMessageSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreateEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ return scoring_spec;
+}
+
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64 timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
+std::vector<std::string> GetUrisFromSearchResults(
+ SearchResultProto& search_result_proto) {
+ std::vector<std::string> result_uris;
+ result_uris.reserve(search_result_proto.results_size());
+ for (int i = 0; i < search_result_proto.results_size(); i++) {
+ result_uris.push_back(
+ search_result_proto.mutable_results(i)->document().uri());
+ }
+ return result_uris;
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsValidResults) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ const DocumentProto& document = results.results(0).document();
+ EXPECT_THAT(document, EqualsProto(document_two));
+
+ const SnippetProto& snippet = results.results(0).snippet();
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
+
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
+
+ search_spec.set_query("foo");
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresDocumentScore) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(93);
+ document_one.set_creation_timestamp_ms(10000);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(15);
+ document_two.set_creation_timestamp_ms(12000);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
+ // document score.
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ SearchResultProto results = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).score(), 93);
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).score(), 15);
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresCreationTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(93);
+ document_one.set_creation_timestamp_ms(10000);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(15);
+ document_two.set_creation_timestamp_ms(12000);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ // Rank by CREATION_TS and ensure that the score field is populated with
+ // creation ts.
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ SearchResultProto results = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(0).score(), 12000);
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(1).score(), 10000);
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document_two;
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+ EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+
+ // The token is a random number so we don't verify it.
+ expected_search_result_proto.set_next_page_token(
+ search_result_proto.next_page_token());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchZeroResultLimitReturnsEmptyResults) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(0);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNegativeResultLimitReturnsInvalidArgument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(-5);
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(
+ StatusProto::INVALID_ARGUMENT);
+ expected_search_result_proto.mutable_status()->set_message(
+ "ResultSpecProto.num_per_page cannot be negative.");
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_total_bytes_per_page_threshold(-1);
+
+ SearchResultProto actual_results1 =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results1.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+ result_spec.set_num_total_bytes_per_page_threshold(0);
+ SearchResultProto actual_results2 =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results2.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPersistenceReturnsValidResults) {
+ IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+ {
+ // Set the schema up beforehand.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ // Schema will be persisted to disk when icing goes out of scope.
+ }
+
+ {
+ // Ensure that icing initializes the schema and section_manager
+ // properly from the pre-existing file.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
+ // The index and document store will be persisted to disk when icing goes
+ // out of scope.
+ }
+
+ {
+ // Ensure that the index is brought back up without problems and we
+ // can query for the content that we expect.
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ CreateMessageDocument("namespace", "uri");
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ search_spec.set_query("foo");
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+ }
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnEmpty) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ // Empty result, no next-page token
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+
+ EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
+ Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
+ EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
+ Eq(1000));
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnMultiplePages) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+
+ // Searches and gets the first page, 2 results
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Second page, 2 results
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Third page, 1 result
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ // Because there are no more results, we should not return the next page
+ // token.
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // No more results
+ expected_search_result_proto.clear_results();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithNoScoringShouldReturnMultiplePages) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+
+ // Searches and gets the first page, 2 results
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Second page, 2 results
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Third page, 1 result
+ expected_search_result_proto.clear_results();
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ // Because there are no more results, we should not return the next page
+ // token.
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // No more results
+ expected_search_result_proto.clear_results();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithUnknownEnabledFeatureShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+ search_spec.add_enabled_features("BAD_FEATURE");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest, ShouldReturnMultiplePagesWithSnippets) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+ // Searches and gets the first page, 2 results with 2 snippets
+ SearchResultProto search_result =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ const DocumentProto& document_result_1 = search_result.results(0).document();
+ EXPECT_THAT(document_result_1, EqualsProto(document5));
+ const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
+ EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &document_result_1, snippet_result_1.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
+ ElementsAre("message"));
+
+ const DocumentProto& document_result_2 = search_result.results(1).document();
+ EXPECT_THAT(document_result_2, EqualsProto(document4));
+ const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
+ EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_2,
+ snippet_result_2.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
+ ElementsAre("message"));
+
+ // Second page, 2 result with 1 snippet
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ const DocumentProto& document_result_3 = search_result.results(0).document();
+ EXPECT_THAT(document_result_3, EqualsProto(document3));
+ const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_3,
+ snippet_result_3.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
+ ElementsAre("message"));
+
+ EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
+ EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
+
+ // Third page, 1 result with 0 snippets
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
+ EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, ShouldInvalidateNextPageToken) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ // Searches and gets the first page, 1 result
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
+ uint64_t next_page_token = search_result_proto.next_page_token();
+ // Since the token is a random number, we don't need to verify
+ expected_search_result_proto.set_next_page_token(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+ // Now document1 is still to be fetched.
+
+ // Invalidates token
+ icing.InvalidateNextPageToken(next_page_token);
+
+ // Tries to fetch the second page, no result since it's invalidated
+ expected_search_result_proto.clear_results();
+ expected_search_result_proto.clear_next_page_token();
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchIncludesDocumentsBeforeTtl) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ // Time just has to be less than the document's creation timestamp (100) + the
+ // document's ttl (500)
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(400);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Check that the document is returned as part of search results
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchDoesntIncludeDocumentsPastTtl) {
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("Message");
+
+ auto body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .Build();
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("message");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // Time just has to be greater than the document's creation timestamp (100) +
+ // the document's ttl (500)
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(700);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Check that the document is not returned as part of search results
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWorksAfterSchemaTypesCompatiblyModified) {
+ SchemaProto schema;
+ auto type_config = schema.add_types();
+ type_config->set_schema_type("message");
+
+ auto property = type_config->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ DocumentProto message_document =
+ DocumentBuilder()
+ .SetKey("namespace", "message_uri")
+ .SetSchema("message")
+ .AddStringProperty("body", "foo")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+
+ // Make sure we can search for message document
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ // The message isn't indexed, so we get nothing
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // With just the schema type filter, we can search for the message
+ search_spec.Clear();
+ search_spec.add_schema_type_filters("message");
+
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message_document;
+
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
+ // will force a change in the DocumentStore's cached SchemaTypeIds
+ schema.clear_types();
+ type_config = schema.add_types();
+ type_config->set_schema_type("email");
+
+ // Adding a new indexed property will require reindexing
+ type_config = schema.add_types();
+ type_config->set_schema_type("message");
+
+ property = type_config->add_properties();
+ property->set_property_name("body");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ search_spec.Clear();
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.add_schema_type_filters("message");
+
+ // We can still search for the message document
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByDocumentScore) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending score order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchShouldAllowNoScoring) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship of them is:
+ // document1 < document2 < document3
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(1571111111111)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(1572222222222)
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(1573333333333)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+
+ // Results should not be ranked by score but returned in reverse insertion
+ // order.
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldBeRankedByCreationTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of creation
+ // timestamp score is: document1 < document2 < document3
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(1571111111111)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(1572222222222)
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(1573333333333)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending timestamp order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByUsageCount) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in a different order to eliminate the
+ // possibility that the following results are sorted in the default reverse
+ // insertion order.
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 >
+ // doc1 when ranked by USAGE_TYPE1_COUNT.
+ UsageReport usage_report_doc3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending USAGE_TYPE1_COUNT order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldHaveDefaultOrderWithoutUsageCounts) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // None of the documents have usage reports. Result should be in the default
+ // reverse insertion order.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldBeRankedByUsageTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in a different order to eliminate the
+ // possibility that the following results are sorted in the default reverse
+ // insertion order.
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when
+ // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP.
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // Both doc5 and doc7 have "coffee" in name and text sections.
+ // However, doc5 has more matches in the text section.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 3 times
+ "namespace1/uri7", // 'coffee' 2 times
+ "namespace1/uri1", // 'food' 2 times
+ "namespace1/uri4", // 'food' 2 times
+ "namespace1/uri2", // 'food' 1 time
+ "namespace1/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespaceAdvanced) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1");
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // Both doc5 and doc7 have "coffee" in name and text sections.
+ // However, doc5 has more matches in the text section.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 3 times
+ "namespace1/uri7", // 'coffee' 2 times
+ "namespace1/uri1", // 'food' 2 times
+ "namespace1/uri4", // 'food' 2 times
+ "namespace1/uri2", // 'food' 1 time
+ "namespace1/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ Bm25fRelevanceScoringOneNamespaceNotOperator) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza",
+ "thin-crust pizza. good and fast. nice coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee -starbucks");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 3 times, 'starbucks' 0 times
+ "namespace1/uri3")); // 'coffee' 1 times, 'starbucks' 0 times
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ Bm25fRelevanceScoringOneNamespaceSectionRestrict) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document =
+ CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18,
+ "peets coffee, best coffee",
+ "espresso. decaf. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri7", /*score=*/4, "starbucks",
+ "habit. birthday rewards. good coffee. brewed coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("subject:coffee OR body:food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // The term frequencies of "coffee" and "food" are calculated respectively
+ // from the subject section and the body section.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(
+ GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject
+ "namespace1/uri1", // 'food' 2 times in section body
+ "namespace1/uri4", // 'food' 2 times in section body
+ "namespace1/uri2", // 'food' 1 time in section body
+ "namespace1/uri6")); // 'food' 1 time in section body
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringTwoNamespaces) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace2".
+ document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+ "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+ "starbucks coffee", "good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ ResultSpecProto result_spec_proto;
+ result_spec_proto.set_num_per_page(16);
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec_proto);
+
+ // Result should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // The two corpora have the same documents except for document 7, which in
+ // "namespace2" is much shorter than the average dcoument length, so it is
+ // boosted.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
+ "namespace1/uri5", // 'coffee' 3 times
+ "namespace2/uri5", // 'coffee' 3 times
+ "namespace1/uri7", // 'coffee' 2 times
+ "namespace1/uri1", // 'food' 2 times
+ "namespace2/uri1", // 'food' 2 times
+ "namespace1/uri4", // 'food' 2 times
+ "namespace2/uri4", // 'food' 2 times
+ "namespace1/uri2", // 'food' 1 time
+ "namespace2/uri2", // 'food' 1 time
+ "namespace1/uri6", // 'food' 1 time
+ "namespace2/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringWithNamespaceFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace1".
+ DocumentProto document = CreateEmailDocument(
+ "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+ "starbucks coffee",
+ "habit. birthday rewards. good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Create and index documents in namespace "namespace2".
+ document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+ "sushi belmont",
+ "fresh fish. inexpensive. good sushi.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+ "indian food. buffet. spicy food. kadai chicken.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+ "panda express",
+ "chinese food. cheap. inexpensive. kung pao.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+ "speederia pizza",
+ "thin-crust pizza. good and fast.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+ "whole foods",
+ "salads. pizza. organic food. expensive.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+ "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument(
+ "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+ "bulk. cheap whole beans. frozen fish. food samples.");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+ "starbucks coffee", "good coffee");
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("coffee OR food");
+ search_spec.set_search_type(GetParam());
+ // Now query only corpus 2
+ search_spec.add_namespace_filters("namespace2");
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ search_result_proto = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+
+ // Result from namespace "namespace2" should be in descending score order
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+ // Both doc5 and doc7 have "coffee" in name and text sections.
+ // Even though doc5 has more matches in the text section, doc7's length is
+ // much shorter than the average corpus's length, so it's being boosted.
+ // Documents with "food" are ranked lower as the term "food" is commonly
+ // present in this corpus, and thus, has a lower IDF.
+ EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+ ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
+ "namespace2/uri5", // 'coffee' 3 times
+ "namespace2/uri1", // 'food' 2 times
+ "namespace2/uri4", // 'food' 2 times
+ "namespace2/uri2", // 'food' 1 time
+ "namespace2/uri6")); // 'food' 1 time
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // None of the documents have usage reports. Result should be in the default
+ // reverse insertion order.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedAscendingly) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in the order that is different than
+ // their score order
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ // Result should be in ascending score order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingDuplicateNamespaceShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "namespace1" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ entry = result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingDuplicateSchemaShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "Message" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("nonexistentMessage");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "namespace1xMessage" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify zero results. This should result in an error.
+ ResultSpecProto result_spec;
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(0);
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+ // Specify negative results. This should result in an error.
+ result_spec.mutable_result_groupings(0)->set_max_results(-1);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingMultiNamespaceGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(5)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document6 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(6)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ // "m" will match all 6 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(2);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace3");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // The last result (document1) in namespace "namespace1" should not be
+ // included. "namespace2" and "namespace3" are grouped together. So only the
+ // two highest scored documents between the two (both of which are in
+ // "namespace3") should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document6;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchResultGroupingMultiSchemaGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetScore(1)
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .SetScore(2)
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri3")
+ .SetSchema("Message")
+ .SetScore(3)
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "f" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("f");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("Email");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Each of the highest scored documents of schema type "Message" (document3)
+ // and "Email" (document1) should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingMultiNamespaceAndSchemaGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(5)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document6 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(6)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ // "m" will match all 6 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("namespace3");
+ entry->set_schema("Message");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // The three highest scored documents that fit the criteria of
+ // "namespace1xMessage" (document2), "namespace2xMessage" (document4),
+ // and "namespace3xMessage" (document6) should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document6;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonexistentNamespaceShouldBeIgnored) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace1");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_namespace_("nonexistentNamespace");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned. The presence of "nonexistentNamespace" in the same result
+ // grouping should have no effect.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonexistentSchemaShouldBeIgnored) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("nonexistentMessage");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Only the top ranked document in "Message" (document2), should be
+ // returned. The presence of "nonexistentMessage" in the same result
+ // grouping should have no effect.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message4")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ ResultSpecProto::ResultGrouping::Entry* entry =
+ result_grouping->add_entry_groupings();
+ result_grouping->set_max_results(1);
+ entry->set_namespace_("namespace2");
+ entry->set_schema("Message");
+ entry = result_grouping->add_entry_groupings();
+ entry->set_schema("namespace1");
+ entry->set_schema("nonexistentMessage");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // Only the top ranked document in "namespace2xMessage" (document4), should be
+ // returned. The presence of "namespace1xnonexistentMessage" in the same
+ // result grouping should have no effect. If either the namespace or the
+ // schema type is nonexistent, the entire entry will be ignored.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document4;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetNormalization) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "MDI zurich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "mdi Zürich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("mdi Zürich");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ const DocumentProto& result_document_1 = results.results(0).document();
+ const SnippetProto& result_snippet_1 = results.results(0).snippet();
+ EXPECT_THAT(result_document_1, EqualsProto(document_two));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
+
+ const DocumentProto& result_document_2 = results.results(1).document();
+ const SnippetProto& result_snippet_2 = results.results(1).snippet();
+ EXPECT_THAT(result_document_2, EqualsProto(document_one));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetNormalizationPrefix) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "MDI zurich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "mdi Zürich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("md Zür");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ const DocumentProto& result_document_1 = results.results(0).document();
+ const SnippetProto& result_snippet_1 = results.results(0).snippet();
+ EXPECT_THAT(result_document_1, EqualsProto(document_two));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
+
+ const DocumentProto& result_document_2 = results.results(1).document();
+ const SnippetProto& result_snippet_2 = results.results(1).snippet();
+ EXPECT_THAT(result_document_2, EqualsProto(document_one));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetSectionRestrict) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "MDI zurich Team Meeting")
+ .AddStringProperty("body", "MDI zurich Team Meeting")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "MDI zurich trip")
+ .AddStringProperty("body", "Let's travel to zurich")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("body:Zür");
+ search_spec->set_search_type(GetParam());
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+ result_spec->set_num_per_page(1);
+ result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec->mutable_snippet_spec()->set_num_matches_per_property(10);
+ result_spec->mutable_snippet_spec()->set_num_to_snippet(10);
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(1));
+
+ const DocumentProto& result_document_two = results.results(0).document();
+ const SnippetProto& result_snippet_two = results.results(0).snippet();
+ EXPECT_THAT(result_document_two, EqualsProto(document_two));
+ EXPECT_THAT(result_snippet_two.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_two, result_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+ ElementsAre("Let's travel to zurich"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+ ElementsAre("zurich"));
+
+ search_spec.reset();
+ scoring_spec.reset();
+ result_spec.reset();
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(1));
+
+ const DocumentProto& result_document_one = results.results(0).document();
+ const SnippetProto& result_snippet_one = results.results(0).snippet();
+ EXPECT_THAT(result_document_one, EqualsProto(document_one));
+ EXPECT_THAT(result_snippet_one.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_one,
+ result_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+ ElementsAre("MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+ ElementsAre("zurich"));
+}
+
+TEST_P(IcingSearchEngineSearchTest, Hyphens) {
+ // TODO(b/208654892): Fix issues with minus/hyphen chars.
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ GTEST_SKIP()
+ << "Advanced query doesn't properly support hyphens at this time.";
+ }
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("MyType");
+ PropertyConfigProto* prop = type->add_properties();
+ prop->set_property_name("foo");
+ prop->set_data_type(PropertyConfigProto::DataType::STRING);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ prop->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("MyType")
+ .AddStringProperty("foo", "foo bar-baz bat")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("MyType")
+ .AddStringProperty("foo", "bar for baz bat-man")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("foo:bar-baz");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionEmptyFieldPath) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and use an empty field
+ // mask to request NO properties.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("hello");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec.set_num_per_page(1);
+ TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("");
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the returned results contain no properties.
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and request only
+ // 'sender.name' and 'subject' properties.
+ // Create all of search_spec, result_spec and scoring_spec as objects with
+ // scope that will end before the call to GetNextPage to ensure that the
+ // implementation isn't relying on references to any of them.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec->set_num_per_page(1);
+ TypePropertyMask* email_field_mask = result_spec->add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("sender.name");
+ email_field_mask->add_paths("subject");
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the first returned result only contains the 'sender.name'
+ // property.
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ // 4. Now, delete all of the specs used in the search. GetNextPage should have
+ // no problem because it shouldn't be keeping any references to them.
+ search_spec.reset();
+ result_spec.reset();
+ scoring_spec.reset();
+
+ // 5. Verify that the second returned result only contains the 'sender.name'
+ // property.
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.add_namespace_filters("namespace");
+ search_spec.add_schema_type_filters(document1.schema());
+ search_spec.set_query("message");
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ // Searches and gets the first page, 2 results with 2 snippets
+ SearchResultProto search_result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
+
+ // Check the stats
+ QueryStatsProto exp_stats;
+ exp_stats.set_query_length(7);
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(1);
+ exp_stats.set_num_schema_types_filtered(1);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_documents_scored(5);
+ exp_stats.set_num_results_with_snippets(2);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Second page, 2 result with 1 snippet
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_results_with_snippets(1);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Third page, 1 result with 0 snippets
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = QueryStatsProto();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_results_with_snippets(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ exp_stats.set_lock_acquisition_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SnippetErrorTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetScore(10)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like cats", "I like dogs",
+ "I like birds", "I like fish")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetScore(20)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like red", "I like green",
+ "I like blue", "I like yellow")
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetScore(5)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like cupcakes", "I like donuts",
+ "I like eclairs", "I like froyo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.add_schema_type_filters("Generic");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("like");
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
+ result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
+ SearchResultProto search_results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ ASSERT_THAT(search_results.results(), SizeIs(3));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri2"));
+ ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), "subject[0]");
+ std::string_view content = GetString(&result->document(), "subject[0]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(1);
+ EXPECT_THAT(entry->property_name(), "subject[1]");
+ content = GetString(&result->document(), "subject[1]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(2);
+ EXPECT_THAT(entry->property_name(), "subject[2]");
+ content = GetString(&result->document(), "subject[2]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ result = &search_results.results(1);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+ ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+ entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), "subject[0]");
+ content = GetString(&result->document(), "subject[0]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(1);
+ ASSERT_THAT(entry->property_name(), "subject[1]");
+ content = GetString(&result->document(), "subject[1]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(2);
+ ASSERT_THAT(entry->property_name(), "subject[2]");
+ content = GetString(&result->document(), "subject[2]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ result = &search_results.results(2);
+ ASSERT_THAT(result->document().uri(), Eq("uri3"));
+ ASSERT_THAT(result->snippet().entries(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, CJKSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "我每天走路去上班。"
+ // ^ ^ ^ ^^
+ // UTF8 idx: 0 3 9 15 18
+ // UTF16 idx: 0 1 3 5 6
+ // Breaks into segments: "我", "每天", "走路", "去", "上班"
+ constexpr std::string_view kChinese = "我每天走路去上班。";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kChinese)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("走");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(
+ std::numeric_limits<int>::max());
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+ std::numeric_limits<int>::max());
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Get the content for "subject" and see what the match is.
+ std::string_view content = GetString(&result->document(), "body");
+ ASSERT_THAT(content, Eq(kChinese));
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
+ EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
+ std::string_view match =
+ content.substr(match_proto.exact_match_byte_position(),
+ match_proto.exact_match_byte_length());
+ ASSERT_THAT(match, Eq("走路"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, InvalidToEmptyQueryTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "Luca Brasi sleeps with the 🐟🐟🐟."
+ // ^ ^ ^ ^ ^ ^ ^ ^ ^
+ // UTF8 idx: 0 5 11 18 23 27 3135 39
+ // UTF16 idx: 0 5 11 18 23 27 2931 33
+ // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
+ // and "🐟".
+ constexpr std::string_view kSicilianMessage =
+ "Luca Brasi sleeps with the 🐟🐟🐟.";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kSicilianMessage)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "Some other content.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("?");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+ ScoringSpecProto scoring_spec;
+ ResultSpecProto result_spec;
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.results(), IsEmpty());
+ } else {
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query("。");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.results(), IsEmpty());
+ } else {
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query("-");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ } else {
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query(":");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ // This is the actual correct behavior.
+ EXPECT_THAT(search_results.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ } else {
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query("OR");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ EXPECT_THAT(search_results.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ } else {
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+ }
+
+ search_spec.set_query(" ");
+ search_results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_results.status(), ProtoIsOk());
+ EXPECT_THAT(search_results.results(), SizeIs(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, EmojiSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "Luca Brasi sleeps with the 🐟🐟🐟."
+ // ^ ^ ^ ^ ^ ^ ^ ^ ^
+ // UTF8 idx: 0 5 11 18 23 27 3135 39
+ // UTF16 idx: 0 5 11 18 23 27 2931 33
+ // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
+ // and "🐟".
+ constexpr std::string_view kSicilianMessage =
+ "Luca Brasi sleeps with the 🐟🐟🐟.";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kSicilianMessage)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "Some other content.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("🐟");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Get the content for "subject" and see what the match is.
+ std::string_view content = GetString(&result->document(), "body");
+ ASSERT_THAT(content, Eq(kSicilianMessage));
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27));
+ EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4));
+ std::string_view match =
+ content.substr(match_proto.exact_match_byte_position(),
+ match_proto.exact_match_byte_length());
+ ASSERT_THAT(match, Eq("🐟"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto person3 =
+ DocumentBuilder()
+ .SetKey(R"(pkg$db/name#space\\)", "person3")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first3")
+ .AddStringProperty("lastName", "last3")
+ .AddStringProperty("emailAddress", "email3@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(3)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId",
+ R"(pkg$db/name\#space\\\\#person3)") // escaped
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_max_joined_child_count(100);
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::MAX);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+
+ // Since we:
+ // - Use MAX for aggregation scoring strategy.
+ // - (Default) use DOCUMENT_SCORE to score child documents.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person1 + email1 should have the highest aggregated score (3) and be
+ // returned first. person2 + email2 (aggregated score = 2) should be the
+ // second, and person3 + email3 (aggregated score = 1) should be the last.
+ SearchResultProto expected_result1;
+ expected_result1.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto1 =
+ expected_result1.mutable_results()->Add();
+ *result_proto1->mutable_document() = person1;
+ *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
+
+ SearchResultProto expected_result2;
+ expected_result2.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_google::protobuf =
+ expected_result2.mutable_results()->Add();
+ *result_google::protobuf->mutable_document() = person2;
+ *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email2;
+
+ SearchResultProto expected_result3;
+ expected_result3.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto3 =
+ expected_result3.mutable_results()->Add();
+ *result_proto3->mutable_document() = person3;
+ *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3;
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result1.set_next_page_token(next_page_token);
+ EXPECT_THAT(result1,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result1));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ expected_result2.set_next_page_token(next_page_token);
+ EXPECT_THAT(result2,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result2));
+
+ SearchResultProto result3 = icing.GetNextPage(next_page_token);
+ next_page_token = result3.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result3,
+ EqualsSearchResultIgnoreStatsAndScores(expected_result3));
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterAdvancedQuerySucceeds) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 25)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ DocumentProto document_three = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("cost", 2)
+ .Build();
+ ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("price < 20");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+
+ search_spec.set_query("price == 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+
+ search_spec.set_query("cost > 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.results(), IsEmpty());
+
+ search_spec.set_query("cost >= 2");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_three));
+
+ search_spec.set_query("price <= 25");
+ results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_F(IcingSearchEngineSearchTest, NumericFilterOldQueryFails) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("transaction")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("price")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("cost")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 10)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("price", 25)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ DocumentProto document_three = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema("transaction")
+ .SetCreationTimestampMs(1)
+ .AddInt64Property("cost", 2)
+ .Build();
+ ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("price < 20");
+ search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_P(IcingSearchEngineSearchTest, BarisNormalizationTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("name", "Barış")
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ DocumentProto document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1)
+ .AddStringProperty("name", "ıbar")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ScoringSpecProto scoring_spec;
+ ResultSpecProto result_spec;
+
+ SearchResultProto exp_results;
+ exp_results.mutable_status()->set_code(StatusProto::OK);
+ *exp_results.add_results()->mutable_document() = document;
+
+ search_spec.set_query("barış");
+ SearchResultProto results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+ search_spec.set_query("barıs");
+ results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+ search_spec.set_query("baris");
+ results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results));
+
+ SearchResultProto exp_results2;
+ exp_results2.mutable_status()->set_code(StatusProto::OK);
+ *exp_results2.add_results()->mutable_document() = document_two;
+ search_spec.set_query("ı");
+ results = icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results2));
+}
+
+TEST_P(IcingSearchEngineSearchTest, LatinSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ constexpr std::string_view kLatin = "test ḞÖÖḸĬŞĤ test";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kLatin)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("foo");
+ search_spec.set_term_match_type(TERM_MATCH_PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(
+ std::numeric_limits<int>::max());
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+ std::numeric_limits<int>::max());
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Ensure that there is one and only one match within "body"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+
+ // Check that the match is "ḞÖÖḸĬŞĤ".
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+ std::string_view match =
+ kLatin.substr(match_proto.exact_match_byte_position(),
+ match_proto.submatch_byte_length());
+ ASSERT_THAT(match, Eq("ḞÖÖ"));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IcingSearchEngineSearchTest, IcingSearchEngineSearchTest,
+ testing::Values(
+ SearchSpecProto::SearchType::ICING_RAW_QUERY,
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY));
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_suggest_test.cc b/icing/icing-search-engine_suggest_test.cc
new file mode 100644
index 0000000..dbd0a11
--- /dev/null
+++ b/icing/icing-search-engine_suggest_test.cc
@@ -0,0 +1,1304 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/reset.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/proto/usage.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAre;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+ TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+// This test is meant to cover all tests relating to IcingSearchEngine::Search
+// and IcingSearchEngine::SearchSuggestions.
+class IcingSearchEngineSuggestTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+ filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+ }
+
+ const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+ Filesystem filesystem_;
+};
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(GetTestBaseDir());
+ return icing_options;
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4
+ // termFour, 3 termThree, 2 termTwo and one termOne.
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty(
+ "subject", "termOne termTwo termThree termFour termFive termSix")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject",
+ "termTwo termThree termFour termFive termSix")
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termThree termFour termFive termSix")
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri4")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termFour termFive termSix")
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri5")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termFive termSix")
+ .Build();
+ DocumentProto document6 = DocumentBuilder()
+ .SetKey("namespace", "uri6")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termSix")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("t");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Query all suggestions, and they will be ranked.
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
+ ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
+ ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
+ ASSERT_THAT(response.suggestions().at(3).query(), "termthree");
+ ASSERT_THAT(response.suggestions().at(4).query(), "termtwo");
+ ASSERT_THAT(response.suggestions().at(5).query(), "termone");
+
+ // Query first three suggestions, and they will be ranked.
+ suggestion_spec.set_num_to_return(3);
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
+ ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
+ ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInOneNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace1 has 2 results.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace2 and namespace3 has 2 results.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace2");
+ suggestion_spec.add_namespace_filters("namespace3");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_NamespaceNotFound) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Search for non-exist namespace3
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace3");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::OK));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // Index 4 documents,
+ // namespace1 has 2 hit2 for term one
+ // namespace2 has 2 hit2 for term two and 1 hit for term one.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termone")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termone")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termone termtwo")
+ .Build();
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "termtwo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionTermOne;
+ suggestionTermOne.set_query("termone");
+ SuggestionResponse::Suggestion suggestionTermTwo;
+ suggestionTermTwo.set_query("termtwo");
+
+ // only search suggestion for namespace2. The correctly order should be
+ // {"termtwo", "termone"}. If we're not filtering out namespace1 when
+ // calculating our score, then it will be {"termone", "termtwo"}.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("t");
+ suggestion_spec.add_namespace_filters("namespace2");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ ElementsAre(EqualsProto(suggestionTermTwo),
+ EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_DeletionTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace1 has this suggestion
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // namespace2 has this suggestion
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // delete document from namespace 1
+ EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
+
+ // Now namespace1 will return empty
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace1");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+
+ // namespace2 still has this suggestion, so we can prove the reason of
+ // namespace 1 cannot find it is we filter it out, not it doesn't exist.
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInOneDocument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ // Only search in namespace1,uri1
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+ namespace1_uri1->add_document_uris("uri1");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // Only search in namespace1,uri2
+ suggestion_spec.clear_document_uri_filters();
+ NamespaceDocumentUriGroup* namespace1_uri2 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri2->set_namespace_("namespace1");
+ namespace1_uri2->add_document_uris("uri2");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInMultipleDocument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ // Only search document in namespace1,uri1 and namespace2,uri2
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1_uri2 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1_uri2->set_namespace_("namespace1");
+ namespace1_uri1_uri2->add_document_uris("uri1");
+ namespace1_uri1_uri2->add_document_uris("uri2");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool),
+ EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace3", "uri3")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ // Only search document in namespace1,uri1 and all documents under namespace2
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.add_namespace_filters("namespace2");
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+ namespace1_uri1->add_document_uris("uri1");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool),
+ EqualsProto(suggestionFoo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_DocumentIdDoesntExist) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Search for a non-exist document id : namespace3,uri3
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_namespace_filters("namespace3");
+ NamespaceDocumentUriGroup* namespace3_uri3 =
+ suggestion_spec.add_document_uri_filters();
+ namespace3_uri3->set_namespace_("namespace3");
+ namespace3_uri3->add_document_uris("uri3");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Search for the document namespace1,uri1 with namespace filter in
+ // namespace2.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+ namespace1_uri1->add_document_uris("uri1");
+ suggestion_spec.add_namespace_filters("namespace2");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_EmptyDocumentIdInNamespace) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Give empty document uris in namespace 1
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ NamespaceDocumentUriGroup* namespace1_uri1 =
+ suggestion_spec.add_document_uri_filters();
+ namespace1_uri1->set_namespace_("namespace1");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Email");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_SchemaTypeNotFound) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Email");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_ShouldReturnInDesiredProperty) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .AddStringProperty("emailAddress", "fo")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Only search in subject.
+ TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
+ mask->set_schema_type("Email");
+ mask->add_paths("subject");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // Search in subject and sender.name
+ suggestion_spec.clear_type_property_filters();
+ mask = suggestion_spec.add_type_property_filters();
+ mask->set_schema_type("Email");
+ mask->add_paths("subject");
+ mask->add_paths("sender.name");
+
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_NestedPropertyReturnNothing) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Only search in Person.name.
+ suggestion_spec.add_schema_type_filters("Person");
+ TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
+ mask->set_schema_type("Person");
+ mask->add_paths("name");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_PropertyFilterAndSchemaFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "fool")
+ .AddDocumentProperty("sender", DocumentBuilder()
+ .SetKey("namespace", "uri1-sender")
+ .SetSchema("Person")
+ .AddStringProperty("name", "foo")
+ .Build())
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFoo;
+ suggestionFoo.set_query("foo");
+ SuggestionResponse::Suggestion suggestionFo;
+ suggestionFo.set_query("fo");
+
+ // Search in sender.name of Email and everything in Message.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Email");
+ suggestion_spec.add_schema_type_filters("Message");
+ TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
+ mask1->set_schema_type("Email");
+ mask1->add_paths("sender.name");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFoo),
+ EqualsProto(suggestionFo)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("body", "fo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Search in sender.name of Email but schema type is Message.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+ suggestion_spec.add_schema_type_filters("Message");
+ TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
+ mask1->set_schema_type("Email");
+ mask1->add_paths("sender.name");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_OrderByTermFrequency) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Message")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty(
+ "body", "termthree termthree termthree termtwo termtwo termone")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+ // Search in sender.name of Email but schema type is Message.
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("t");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::EXACT_ONLY);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY);
+
+ SuggestionResponse::Suggestion suggestionTermOne;
+ suggestionTermOne.set_query("termone");
+ SuggestionResponse::Suggestion suggestionTermTwo;
+ suggestionTermTwo.set_query("termtwo");
+ SuggestionResponse::Suggestion suggestionTermThree;
+ suggestionTermThree.set_query("termthree");
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ ElementsAre(EqualsProto(suggestionTermThree),
+ EqualsProto(suggestionTermTwo),
+ EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_ExpiredTest) {
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(500)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .AddStringProperty("subject", "fool")
+ .Build();
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(400);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ // namespace1 has this suggestion
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+ // namespace2 has this suggestion
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+ }
+ // We reinitialize here so we can feed in a fake clock this time
+ {
+ // Time needs to be past document1 creation time (100) + ttl (500) for it
+ // to count as "expired". document2 is not expired since its ttl is 1000.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(800);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("f");
+ suggestion_spec.add_namespace_filters("namespace1");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ // Now namespace1 will return empty
+ suggestion_spec.clear_namespace_filters();
+ suggestion_spec.add_namespace_filters("namespace1");
+ SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(), IsEmpty());
+
+ // namespace2 still has this suggestion
+ SuggestionResponse::Suggestion suggestionFool;
+ suggestionFool.set_query("fool");
+
+ suggestion_spec.add_namespace_filters("namespace2");
+ response = icing.SearchSuggestions(suggestion_spec);
+ ASSERT_THAT(response.status(), ProtoIsOk());
+ ASSERT_THAT(response.suggestions(),
+ UnorderedElementsAre(EqualsProto(suggestionFool)));
+ }
+}
+
+TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_emptyPrefix) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("");
+ suggestion_spec.set_num_to_return(10);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSuggestTest,
+ SearchSuggestionsTest_NonPositiveNumToReturn) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SuggestionSpecProto suggestion_spec;
+ suggestion_spec.set_prefix("prefix");
+ suggestion_spec.set_num_to_return(0);
+ suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+ TermMatchType::PREFIX);
+ suggestion_spec.mutable_scoring_spec()->set_rank_by(
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
+
+ ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 5ec78a0..e0070e0 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -27,8 +27,6 @@
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
#include "icing/jni/jni-cache.h"
-#include "icing/join/join-processor.h"
-#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
@@ -47,19 +45,13 @@
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/proto/usage.pb.h"
-#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
-#include "icing/schema/schema-store.h"
-#include "icing/schema/section.h"
-#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/random-string.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
-#include "icing/util/snippet-helpers.h"
namespace icing {
namespace lib {
@@ -67,54 +59,16 @@ namespace lib {
namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
-using ::testing::_;
-using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
-using ::testing::Le;
-using ::testing::Lt;
-using ::testing::Matcher;
-using ::testing::Ne;
using ::testing::Return;
using ::testing::SizeIs;
using ::testing::StrEq;
using ::testing::UnorderedElementsAre;
-constexpr std::string_view kIpsumText =
- "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
- "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
- "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
- "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
- "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
- "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
- "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
- "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
- "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
- "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
- "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
- "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
- "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
- "placerat semper.";
-
-PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
- Filesystem filesystem, const std::string& file_path) {
- PortableFileBackedProtoLog<DocumentWrapper>::Header header;
- filesystem.PRead(file_path.c_str(), &header,
- sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
- /*offset=*/0);
- return header;
-}
-
-void WriteDocumentLogHeader(
- Filesystem filesystem, const std::string& file_path,
- PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
- filesystem.Write(file_path.c_str(), &header,
- sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
-}
-
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
@@ -130,6 +84,8 @@ class TestIcingSearchEngine : public IcingSearchEngine {
std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+// This test is meant to cover all tests relating to IcingSearchEngine apis not
+// specifically covered by the other IcingSearchEngine*Test.
class IcingSearchEngineTest : public testing::Test {
protected:
void SetUp() override {
@@ -158,21 +114,9 @@ class IcingSearchEngineTest : public testing::Test {
Filesystem filesystem_;
};
-constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
-
// Non-zero value so we don't override it to be the current time
constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
-std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
-
-std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
-
-std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
-
-std::string GetHeaderFilename() {
- return GetTestBaseDir() + "/icing_search_engine_header";
-}
-
IcingSearchEngineOptions GetDefaultIcingOptions() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestBaseDir());
@@ -188,19 +132,6 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
.Build();
}
-DocumentProto CreateEmailDocument(const std::string& name_space,
- const std::string& uri, int score,
- const std::string& subject_content,
- const std::string& body_content) {
- return DocumentBuilder()
- .SetKey(name_space, uri)
- .SetSchema("Email")
- .SetScore(score)
- .AddStringProperty("subject", subject_content)
- .AddStringProperty("body", body_content)
- .Build();
-}
-
SchemaProto CreateMessageSchema() {
return SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
@@ -211,23 +142,6 @@ SchemaProto CreateMessageSchema() {
.Build();
}
-SchemaProto CreateEmailSchema() {
- return SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
-}
-
SchemaProto CreatePersonAndEmailSchema() {
return SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -280,1642 +194,6 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
return usage_report;
}
-std::vector<std::string> GetUrisFromSearchResults(
- SearchResultProto& search_result_proto) {
- std::vector<std::string> result_uris;
- result_uris.reserve(search_result_proto.results_size());
- for (int i = 0; i < search_result_proto.results_size(); i++) {
- result_uris.push_back(
- search_result_proto.mutable_results(i)->document().uri());
- }
- return result_uris;
-}
-
-TEST_F(IcingSearchEngineTest, SimpleInitialization) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, InitializingAgainSavesNonPersistedData) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document;
-
- ASSERT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, NegativeMergeSizeReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(-1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMergeSizeReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(0);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- // One is fine, if a bit weird. It just means that the lite index will be
- // smaller and will request a merge any time content is added to it.
- options.set_index_merge_size(1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_max_token_length(-1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMaxTokenLenReturnsInvalidArgument) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_max_token_length(0);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- // A length of 1 is allowed - even though it would be strange to want
- // this.
- options.set_max_token_length(1);
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // "message" should have been truncated to "m"
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- // The indexed tokens were truncated to length of 1, so "m" will match
- search_spec.set_query("m");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // The query token is also truncated to length of 1, so "me"->"m" matches "m"
- search_spec.set_query("me");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // The query token is still truncated to length of 1, so "massage"->"m"
- // matches "m"
- search_spec.set_query("massage");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- // Set token length to max. This is allowed (it just means never to
- // truncate tokens). However, this does mean that tokens that exceed the
- // size of the lexicon will cause indexing to fail.
- options.set_max_token_length(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add a document that just barely fits under the max document limit.
- // This will still fail to index because we won't actually have enough
- // room in the lexicon to fit this content.
- std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Message")
- .AddStringProperty("body", std::move(enormous_string))
- .Build();
- EXPECT_THAT(icing.Put(document).status(),
- ProtoStatusIs(StatusProto::OUT_OF_SPACE));
-
- SearchSpecProto search_spec;
- search_spec.set_query("p");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- // This fails DocumentStore::Create()
- ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
- .WillByDefault(Return(false));
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(),
- ProtoStatusIs(StatusProto::INTERNAL));
- EXPECT_THAT(initialize_result_proto.status().message(),
- HasSubstr("Could not create directory"));
-}
-
-TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresAtThreshold) {
- Filesystem filesystem;
- DocumentProto email1 =
- CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
- email1.set_creation_timestamp_ms(10000);
- DocumentProto email2 =
- CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
- email2.set_creation_timestamp_ms(10000);
-
- {
- // Create an index with a few documents.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- }
-
- // Write an init marker file with 5 previously failed attempts.
- std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-
- {
- ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
- int network_init_attempts = GHostToNetworkL(5);
- // Write the updated number of attempts before we get started.
- ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
- &network_init_attempts,
- sizeof(network_init_attempts)));
- ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
- }
-
- {
- // Create the index again and verify that initialization succeeds and no
- // data is thrown out.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(5));
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
- .document(),
- EqualsProto(email1));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .document(),
- EqualsProto(email2));
- }
-
- // The successful init should have thrown out the marker file.
- ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresBeyondThreshold) {
- Filesystem filesystem;
- DocumentProto email1 =
- CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
- DocumentProto email2 =
- CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-
- {
- // Create an index with a few documents.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- }
-
- // Write an init marker file with 6 previously failed attempts.
- std::string marker_filepath = GetTestBaseDir() + "/init_marker";
-
- {
- ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
- int network_init_attempts = GHostToNetworkL(6);
- // Write the updated number of attempts before we get started.
- ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
- &network_init_attempts,
- sizeof(network_init_attempts)));
- ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
- }
-
- {
- // Create the index again and verify that initialization succeeds and all
- // data is thrown out.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(),
- ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(6));
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- }
-
- // The successful init should have thrown out the marker file.
- ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, SuccessiveInitFailuresIncrementsInitMarker) {
- Filesystem filesystem;
- DocumentProto email1 =
- CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
- DocumentProto email2 =
- CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
-
- {
- // 1. Create an index with a few documents.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoIsOk());
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- }
-
- {
- // 2. Create an index that will encounter an IO failure when trying to
- // create the document log.
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- std::string document_log_filepath =
- icing_options.base_dir() + "/document_dir/document_log_v1";
- auto get_filesize_lambda = [this,
- &document_log_filepath](const char* filename) {
- if (strncmp(document_log_filepath.c_str(), filename,
- document_log_filepath.length()) == 0) {
- return Filesystem::kBadFileSize;
- }
- return this->filesystem()->GetFileSize(filename);
- };
- ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
- .WillByDefault(get_filesize_lambda);
-
- TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(),
- GetTestJniCache());
-
- // Fail to initialize six times in a row.
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(0));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(1));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(2));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(3));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(4));
-
- init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(5));
- }
-
- {
- // 3. Create the index again and verify that initialization succeeds and all
- // data is thrown out.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- ASSERT_THAT(init_result.status(),
- ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
- Eq(6));
-
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- }
-
- // The successful init should have thrown out the marker file.
- std::string marker_filepath = GetTestBaseDir() + "/init_marker";
- ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest,
- CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
- // Create a type config with a circular reference.
- SchemaProto schema;
- auto* type = schema.add_types();
- type->set_schema_type("Message");
-
- auto* body = type->add_properties();
- body->set_property_name("recipient");
- body->set_schema_type("Person");
- body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- type = schema.add_types();
- type->set_schema_type("Person");
-
- body = type->add_properties();
- body->set_property_name("recipient");
- body->set_schema_type("Message");
- body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, PutWithoutSchemaFailedPrecondition) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
-}
-
-TEST_F(IcingSearchEngineTest, FailToReadSchema) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- {
- // Successfully initialize and set a schema
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- }
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
-
- // This fails FileBackedProto::Read() when we try to check the schema we
- // had previously set
- ON_CALL(*mock_filesystem,
- OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
- .WillByDefault(Return(-1));
-
- TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(),
- GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = test_icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(),
- ProtoStatusIs(StatusProto::INTERNAL));
- EXPECT_THAT(initialize_result_proto.status().message(),
- HasSubstr("Unable to open file for read"));
-}
-
-TEST_F(IcingSearchEngineTest, FailToWriteSchema) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- // This fails FileBackedProto::Write()
- ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb")))
- .WillByDefault(Return(-1));
-
- TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SetSchemaResultProto set_schema_result_proto =
- icing.SetSchema(CreateMessageSchema());
- EXPECT_THAT(set_schema_result_proto.status(),
- ProtoStatusIs(StatusProto::INTERNAL));
- EXPECT_THAT(set_schema_result_proto.status().message(),
- HasSubstr("Unable to open file for write"));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleFails) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with properties { "title", "body"}
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- // 2. Add an email document
- DocumentProto doc = DocumentBuilder()
- .SetKey("emails", "email#1")
- .SetSchema("Email")
- .AddStringProperty("title", "Hello world.")
- .AddStringProperty("body", "Goodnight Moon.")
- .Build();
- EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 3. Set a schema that deletes email. This should fail.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Message");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(
- icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- // 4. Try to delete by email type. This should succeed because email wasn't
- // deleted in step 3.
- EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleForceOverrideSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with properties { "title", "body"}
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- // 2. Add an email document
- DocumentProto doc = DocumentBuilder()
- .SetKey("emails", "email#1")
- .SetSchema("Email")
- .AddStringProperty("title", "Hello world.")
- .AddStringProperty("body", "Goodnight Moon.")
- .Build();
- EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 3. Set a schema that deletes email with force override. This should
- // succeed and delete the email type.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Message");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
-
- // 4. Try to delete by email type. This should fail because email was
- // already deleted.
- EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaUnsetVersionIsZero) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that adds a new optional property and updates version.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // 3. SetSchema should succeed and the version number should be updated.
- SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
- ->Add("Email");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleVersionUpdateFails) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- // 3. SetSchema should fail and the version number should NOT be updated.
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
- // with force override to true.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- // 3. SetSchema should succeed and the version number should be updated.
- EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaNoChangeVersionUpdateSucceeds) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 1. Create a schema with an Email type with version 1
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(1);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 2. Create schema that only changes the version.
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_version(2);
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // 3. SetSchema should succeed and the version number should be updated.
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaDuplicateTypesReturnsAlreadyExists) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create a schema with types { "Email", "Message" and "Email" }
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- type = schema.add_types();
- type->set_schema_type("Message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- *schema.add_types() = schema.types(0);
-
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::ALREADY_EXISTS));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaDuplicatePropertiesReturnsAlreadyExists) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create a schema with an Email type with properties { "title", "body" and
- // "title" }
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(),
- ProtoStatusIs(StatusProto::ALREADY_EXISTS));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchema) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- auto message_document = CreateMessageDocument("namespace", "uri");
-
- auto schema_with_message = CreateMessageSchema();
-
- SchemaProto schema_with_email;
- SchemaTypeConfigProto* type = schema_with_email.add_types();
- type->set_schema_type("Email");
- PropertyConfigProto* property = type->add_properties();
- property->set_property_name("title");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- SchemaProto schema_with_email_and_message = schema_with_email;
- type = schema_with_email_and_message.add_types();
- type->set_schema_type("Message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // Create an arbitrary invalid schema
- SchemaProto invalid_schema;
- SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
- empty_type->set_schema_type("");
-
- // Make sure we can't set invalid schemas
- SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
- EXPECT_THAT(set_schema_result.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
- // Can add an document of a set schema
- set_schema_result = icing.SetSchema(schema_with_message);
- EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
-
- // Schema with Email doesn't have Message, so would result incompatible
- // data
- set_schema_result = icing.SetSchema(schema_with_email);
- EXPECT_THAT(set_schema_result.status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
- // Can expand the set of schema types and add an document of a new
- // schema type
- set_schema_result = icing.SetSchema(schema_with_email_and_message);
- EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
- EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
-
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
- // Can't add an document whose schema isn't set
- auto photo_document = DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Photo")
- .AddStringProperty("creator", "icing")
- .Build();
- PutResultProto put_result_proto = icing.Put(photo_document);
- EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
- EXPECT_THAT(put_result_proto.status().message(),
- HasSubstr("'Photo' not found"));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
- schema_with_no_indexed_property.mutable_types(0)
- ->mutable_properties(0)
- ->clear_string_indexing_config();
-
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(schema_with_no_indexed_property);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Message");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Nothing will be index and Search() won't return anything.
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-
- SchemaProto schema_with_indexed_property = CreateMessageSchema();
- // Index restoration should be triggered here because new schema requires more
- // properties to be indexed.
- set_schema_result = icing.SetSchema(schema_with_indexed_property);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Message");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaTypeConfigProto person_proto =
- SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .Build();
- SchemaProto nested_schema =
- SchemaBuilder()
- .AddType(person_proto)
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_new_schema_types()->Add("Person");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Bill Lundbergh")
- .Build())
- .Build();
-
- // "sender.name" should get assigned property id 0 and subject should get
- // property id 1.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // document should match a query for 'Bill' in 'sender.name', but not in
- // 'subject'
- SearchSpecProto search_spec;
- search_spec.set_query("sender.name:Bill");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- search_spec.set_query("subject:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-
- // Now update the schema with index_nested_properties=false. This should
- // reassign property ids, lead to an index rebuild and ensure that nothing
- // match a query for "Bill".
- SchemaProto no_nested_schema =
- SchemaBuilder()
- .AddType(person_proto)
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/false)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- set_schema_result = icing.SetSchema(no_nested_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // document shouldn't match a query for 'Bill' in either 'sender.name' or
- // 'subject'
- search_spec.set_query("sender.name:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-
- search_spec.set_query("subject:Bill");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-}
-
-TEST_F(IcingSearchEngineTest,
- ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 'body' should have a property id of 0 and 'subject' should have a property
- // id of 1.
- SchemaProto email_with_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(email_with_body_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Create a document with only a subject property.
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- SearchSpecProto search_spec;
- search_spec.set_query("subject:tps");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-
- // Now update the schema to remove the 'body' field. This is backwards
- // incompatible, but document should be preserved because it doesn't contain a
- // 'body' field. If the index is correctly rebuilt, then 'subject' will now
- // have a property id of 0. If not, then the hits in the index will still have
- // have a property id of 1 and therefore it won't be found.
- SchemaProto email_no_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- set_schema_result = icing.SetSchema(
- email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- search_spec.set_query("subject:tps");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-}
-
-TEST_F(
- IcingSearchEngineTest,
- ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // 'body' should have a property id of 0 and 'subject' should have a property
- // id of 1.
- SchemaProto email_with_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(email_with_body_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Create a document with only a subject property.
- DocumentProto document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- SearchSpecProto search_spec;
- search_spec.set_query("subject:tps");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto result;
- result.mutable_status()->set_code(StatusProto::OK);
- *result.mutable_results()->Add()->mutable_document() = document;
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-
- // Now update the schema to remove the 'body' field. This is backwards
- // incompatible, but document should be preserved because it doesn't contain a
- // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to'
- // will now have property ids of 0 and 1 respectively. If not, then the hits
- // in the index will still have have a property id of 1 and therefore it won't
- // be found.
- SchemaProto email_no_body_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("to")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- set_schema_result = icing.SetSchema(
- email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // We should be able to retrieve the document by searching for 'tps' in
- // 'subject'.
- search_spec.set_query("subject:tps");
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
-}
-
-TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaTypeConfigProto email_schema_type =
- SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument("Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .Build();
- SchemaProto nested_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("company")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(email_schema_type)
- .Build();
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_new_schema_types()->Add("Email");
- expected_set_schema_result.mutable_new_schema_types()->Add("Person");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Create two documents - a person document and an email document - both docs
- // should be deleted when we remove the 'company' field from the person type.
- DocumentProto person_document =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Person")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("name", "Bill Lundbergh")
- .AddStringProperty("company", "Initech Corp.")
- .Build();
- EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
-
- DocumentProto email_document =
- DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(1000)
- .AddStringProperty("subject",
- "Did you get the memo about TPS reports?")
- .AddDocumentProperty("sender", person_document)
- .Build();
- EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
-
- // We should be able to retrieve both documents.
- GetResultProto get_result =
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoIsOk());
- EXPECT_THAT(get_result.document(), EqualsProto(person_document));
-
- get_result =
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoIsOk());
- EXPECT_THAT(get_result.document(), EqualsProto(email_document));
-
- // Now update the schema to remove the 'company' field. This is backwards
- // incompatible, *both* documents should be deleted because both fail
- // validation (they each contain a 'Person' that has a non-existent property).
- nested_schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(email_schema_type)
- .Build();
-
- set_schema_result = icing.SetSchema(
- nested_schema, /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
- expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Email");
- expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
- ->Add("Person");
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- // Both documents should be deleted now.
- get_result =
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-
- get_result =
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
- EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-// TODO(b/256022027): add unit tests for join incompatible schema change to make
-// sure the joinable cache is rebuilt correctly.
-
-TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema_with_optional_subject;
- auto type = schema_with_optional_subject.add_types();
- type->set_schema_type("email");
-
- // Add a OPTIONAL property
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(),
- ProtoIsOk());
-
- DocumentProto email_document_without_subject =
- DocumentBuilder()
- .SetKey("namespace", "without_subject")
- .SetSchema("email")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto email_document_with_subject =
- DocumentBuilder()
- .SetKey("namespace", "with_subject")
- .SetSchema("email")
- .AddStringProperty("subject", "foo")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk());
-
- SchemaProto schema_with_required_subject;
- type = schema_with_required_subject.add_types();
- type->set_schema_type("email");
-
- // Add a REQUIRED property
- property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- // Can't set the schema since it's incompatible
- SetSchemaResultProto set_schema_result =
- icing.SetSchema(schema_with_required_subject);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result_proto;
- expected_set_schema_result_proto.mutable_status()->set_code(
- StatusProto::FAILED_PRECONDITION);
- expected_set_schema_result_proto.mutable_status()->set_message(
- "Schema is incompatible.");
- expected_set_schema_result_proto.add_incompatible_schema_types("email");
-
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
-
- // Force set it
- set_schema_result =
- icing.SetSchema(schema_with_required_subject,
- /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result_proto.mutable_status()->clear_message();
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = email_document_with_subject;
-
- EXPECT_THAT(icing.Get("namespace", "with_subject",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // The document without a subject got deleted because it failed validation
- // against the new schema
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, without_subject) not found.");
- expected_get_result_proto.clear_document();
-
- EXPECT_THAT(icing.Get("namespace", "without_subject",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
- type = schema.add_types();
- type->set_schema_type("message");
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto email_document =
- DocumentBuilder()
- .SetKey("namespace", "email_uri")
- .SetSchema("email")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto message_document =
- DocumentBuilder()
- .SetKey("namespace", "message_uri")
- .SetSchema("message")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
-
- // Clear the schema and only add the "email" type, essentially deleting the
- // "message" type
- SchemaProto new_schema;
- type = new_schema.add_types();
- type->set_schema_type("email");
-
- // Can't set the schema since it's incompatible
- SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_result;
- expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
- expected_result.mutable_status()->set_message("Schema is incompatible.");
- expected_result.add_deleted_schema_types("message");
-
- EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
-
- // Force set it
- set_schema_result =
- icing.SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- expected_result.mutable_status()->set_code(StatusProto::OK);
- expected_result.mutable_status()->clear_message();
- EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
-
- // "email" document is still there
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = email_document;
-
- EXPECT_THAT(icing.Get("namespace", "email_uri",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // "message" document got deleted
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, message_uri) not found.");
- expected_get_result_proto.clear_document();
-
- EXPECT_THAT(icing.Get("namespace", "message_uri",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.GetSchema().status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- GetSchemaResultProto expected_get_schema_result_proto;
- expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
- EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaTypeFailedPrecondition) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- GetSchemaTypeResultProto get_schema_type_result_proto =
- icing.GetSchemaType("nonexistent_schema");
- EXPECT_THAT(get_schema_type_result_proto.status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(get_schema_type_result_proto.status().message(),
- HasSubstr("Schema not set"));
-}
-
-TEST_F(IcingSearchEngineTest, GetSchemaTypeOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- GetSchemaTypeResultProto expected_get_schema_type_result_proto;
- expected_get_schema_type_result_proto.mutable_status()->set_code(
- StatusProto::OK);
- *expected_get_schema_type_result_proto.mutable_schema_type_config() =
- CreateMessageSchema().types(0);
- EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()),
- EqualsProto(expected_get_schema_type_result_proto));
-}
-
TEST_F(IcingSearchEngineTest, GetDocument) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -2150,3425 +428,6 @@ TEST_F(IcingSearchEngineTest,
EqualsProto(expected_get_result_proto));
}
-TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(2));
-
- const DocumentProto& document = results.results(0).document();
- EXPECT_THAT(document, EqualsProto(document_two));
-
- const SnippetProto& snippet = results.results(0).snippet();
- EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
- std::string_view content =
- GetString(&document, snippet.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
-
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
- EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
-
- search_spec.set_query("foo");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsScoresDocumentScore) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- document_one.set_score(93);
- document_one.set_creation_timestamp_ms(10000);
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- document_two.set_score(15);
- document_two.set_creation_timestamp_ms(12000);
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
- // document score.
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- SearchResultProto results = icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(2));
-
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
- EXPECT_THAT(results.results(0).score(), 93);
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(1).score(), 15);
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsScoresCreationTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- document_one.set_score(93);
- document_one.set_creation_timestamp_ms(10000);
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- document_two.set_score(15);
- document_two.set_creation_timestamp_ms(12000);
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- // Rank by CREATION_TS and ensure that the score field is populated with
- // creation ts.
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-
- SearchResultProto results = icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(2));
-
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(0).score(), 12000);
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
- EXPECT_THAT(results.results(1).score(), 10000);
-}
-
-TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document_two;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
-
- EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
- Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
- Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
- Eq(1000));
-
- // The token is a random number so we don't verify it.
- expected_search_result_proto.set_next_page_token(
- search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(0);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(-5);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(
- StatusProto::INVALID_ARGUMENT);
- expected_search_result_proto.mutable_status()->set_message(
- "ResultSpecProto.num_per_page cannot be negative.");
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("");
-
- ResultSpecProto result_spec;
- result_spec.set_num_total_bytes_per_page_threshold(-1);
-
- SearchResultProto actual_results1 =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results1.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-
- result_spec.set_num_total_bytes_per_page_threshold(0);
- SearchResultProto actual_results2 =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results2.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- {
- // Set the schema up beforehand.
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- // Schema will be persisted to disk when icing goes out of scope.
- }
-
- {
- // Ensure that icing initializes the schema and section_manager
- // properly from the pre-existing file.
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- // The index and document store will be persisted to disk when icing goes
- // out of scope.
- }
-
- {
- // Ensure that the index is brought back up without problems and we
- // can query for the content that we expect.
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- search_spec.set_query("foo");
-
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- // Empty result, no next-page token
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
-
- EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(),
- Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000));
- EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0));
- EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(),
- Eq(0));
- EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(),
- Eq(1000));
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
-
- // Searches and gets the first page, 2 results
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document5;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Second page, 2 results
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Third page, 1 result
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- // Because there are no more results, we should not return the next page
- // token.
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // No more results
- expected_search_result_proto.clear_results();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
-
- // Searches and gets the first page, 2 results
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document5;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Second page, 2 results
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Third page, 1 result
- expected_search_result_proto.clear_results();
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- // Because there are no more results, we should not return the next page
- // token.
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // No more results
- expected_search_result_proto.clear_results();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchWithUnknownEnabledFeatureShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
- search_spec.add_enabled_features("BAD_FEATURE");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
-
- // Searches and gets the first page, 2 results with 2 snippets
- SearchResultProto search_result =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
- const DocumentProto& document_result_1 = search_result.results(0).document();
- EXPECT_THAT(document_result_1, EqualsProto(document5));
- const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
- EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
- EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &document_result_1, snippet_result_1.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
- ElementsAre("message"));
-
- const DocumentProto& document_result_2 = search_result.results(1).document();
- EXPECT_THAT(document_result_2, EqualsProto(document4));
- const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
- EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
- content = GetString(&document_result_2,
- snippet_result_2.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
- ElementsAre("message"));
-
- // Second page, 2 result with 1 snippet
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
- const DocumentProto& document_result_3 = search_result.results(0).document();
- EXPECT_THAT(document_result_3, EqualsProto(document3));
- const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
- EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
- content = GetString(&document_result_3,
- snippet_result_3.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
- ElementsAre("message body"));
- EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
- ElementsAre("message"));
-
- EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
- EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
-
- // Third page, 1 result with 0 snippets
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(1));
- ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
-
- EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
- EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- // Searches and gets the first page, 1 result
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- // Now document1 is still to be fetched.
-
- // Invalidates token
- icing.InvalidateNextPageToken(next_page_token);
-
- // Tries to fetch the second page, no result since it's invalidated
- expected_search_result_proto.clear_results();
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- AllPageTokensShouldBeInvalidatedAfterOptimization) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- // Searches and gets the first page, 1 result
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken));
- uint64_t next_page_token = search_result_proto.next_page_token();
- // Since the token is a random number, we don't need to verify
- expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- // Now document1 is still to be fetched.
-
- OptimizeResultProto optimize_result_proto;
- optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
- optimize_result_proto.mutable_status()->set_message("");
- OptimizeResultProto actual_result = icing.Optimize();
- actual_result.clear_optimize_stats();
- ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
-
- // Tries to fetch the second page, no results since all tokens have been
- // invalidated during Optimize()
- expected_search_result_proto.clear_results();
- expected_search_result_proto.clear_next_page_token();
- search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
-
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri1) not found.");
- {
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
- const std::string document_log_path =
- icing_options.base_dir() + "/document_dir/" +
- DocumentLogCreator::GetDocumentLogFilename();
- int64_t document_log_size_before =
- filesystem()->GetFileSize(document_log_path.c_str());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
- int64_t document_log_size_after =
- filesystem()->GetFileSize(document_log_path.c_str());
-
- // Validates that document can't be found right after Optimize()
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- // Validates that document is actually removed from document log
- EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) {
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
- IcingSearchEngine icing(icing_options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Create a tmp dir that will be used in Optimize() to swap files,
- // this validates that any tmp dirs will be deleted before using.
- const std::string tmp_dir =
- icing_options.base_dir() + "/document_dir_optimize_tmp";
-
- const std::string tmp_file = tmp_dir + "/file";
- ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
- ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
- ASSERT_TRUE(fd.is_valid());
- ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
- fd.reset();
-
- EXPECT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
- EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
-}
-
-TEST_F(IcingSearchEngineTest, GetOptimizeInfoHasCorrectStats) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .Build();
-
- {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(1000);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Just initialized, nothing is optimizable yet.
- GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
-
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Only have active documents, nothing is optimizable yet.
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
-
- // Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
-
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- int64_t first_estimated_optimizable_bytes =
- optimize_info.estimated_optimizable_bytes();
-
- // Add a second document, but it'll be expired since the time (1000) is
- // greater than the document's creation timestamp (100) + the document's ttl
- // (500)
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
- Gt(first_estimated_optimizable_bytes));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
-
- // Optimize
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
- }
-
- {
- // Recreate with new time
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(5000);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Nothing is optimizable now that everything has been optimized away.
- GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
- }
-}
-
-TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Get() and Put() are good right after Optimize()
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk());
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- *expected_get_result_proto.mutable_document() = document4;
- EXPECT_THAT(
- icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest,
- GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) {
- DocumentProto empty_document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto empty_document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto empty_document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri3")
- .SetSchema("Message")
- .AddStringProperty("body", "")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Get() and Put() are good right after Optimize()
- *expected_get_result_proto.mutable_document() = empty_document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Delete() works right after Optimize()
- EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(
- StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri1) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri1) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri2) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) {
- // Setup filesystem to fail
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- bool just_swapped_files = false;
- auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) {
- if (just_swapped_files) {
- // We should fail the first call immediately after swapping files.
- just_swapped_files = false;
- return false;
- }
- return filesystem()->CreateDirectoryRecursively(dir_name);
- };
- ON_CALL(*mock_filesystem, CreateDirectoryRecursively)
- .WillByDefault(create_dir_lambda);
-
- auto swap_lambda = [&just_swapped_files](const char* first_dir,
- const char* second_dir) {
- just_swapped_files = true;
- return false;
- };
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
- HasSubstr("document_dir")))
- .WillByDefault(swap_lambda);
- TestIcingSearchEngine icing(options, std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // The mocks should cause an unrecoverable error during Optimize - returning
- // INTERNAL.
- ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
-
- // Ordinary operations should fail safely.
- SchemaProto simple_schema;
- auto type = simple_schema.add_types();
- type->set_schema_type("type0");
- auto property = type->add_properties();
- property->set_property_name("prop0");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- DocumentProto simple_doc = DocumentBuilder()
- .SetKey("namespace0", "uri0")
- .SetSchema("type0")
- .AddStringProperty("prop0", "foo")
- .Build();
-
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- ResultSpecProto result_spec;
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-
- EXPECT_THAT(icing.SetSchema(simple_schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Put(simple_doc).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing
- .Get(simple_doc.namespace_(), simple_doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- // Reset should get icing back to a safe (empty) and working state.
- EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
- EXPECT_THAT(icing
- .Get(simple_doc.namespace_(), simple_doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
- SchemaProto schema;
- // Add an email type
- auto type = schema.add_types();
- type->set_schema_type("email");
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- // Add an message type
- type = schema.add_types();
- type->set_schema_type("message");
- property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("email")
- .AddStringProperty("subject", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete the first type. The first doc should be irretrievable. The
- // second should still be present.
- DeleteBySchemaTypeResultProto result_proto =
- icing.DeleteBySchemaType("message");
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteStatsProto exp_stats;
- exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(1);
- EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Search for "message", only document2 should show up.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("message");
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
- SchemaProto schema = CreateMessageSchema();
- // Add an email type
- SchemaProto tmp = CreateEmailSchema();
- *schema.add_types() = tmp.types(0);
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema(schema.types(0).schema_type())
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema(schema.types(1).schema_type())
- .AddStringProperty("subject", "subject subject2")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete the first type. The first doc should be irretrievable. The
- // second should still be present.
- SearchSpecProto search_spec;
- search_spec.add_schema_type_filters(schema.types(0).schema_type());
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace3", "uri3")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete namespace1. Document1 and document2 should be irretrievable.
- // Document3 should still be present.
- DeleteByNamespaceResultProto result_proto =
- icing.DeleteByNamespace("namespace1");
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteStatsProto exp_stats;
- exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(2);
- EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri2) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Search for "message", only document3 should show up.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("message");
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete the first namespace. The first doc should be irretrievable. The
- // second should still be present.
- SearchSpecProto search_spec;
- search_spec.add_namespace_filters("namespace1");
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByQuery) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete all docs containing 'body1'. The first doc should be irretrievable.
- // The second should still be present.
- SearchSpecProto search_spec;
- search_spec.set_query("body1");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteByQueryStatsProto exp_stats;
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(1);
- exp_stats.set_query_length(search_spec.query().length());
- exp_stats.set_num_terms(1);
- exp_stats.set_num_namespaces_filtered(0);
- exp_stats.set_num_schema_types_filtered(0);
- exp_stats.set_parse_query_latency_ms(7);
- exp_stats.set_document_removal_latency_ms(7);
- EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace1, uri1) not found.");
- expected_get_result_proto.clear_document();
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByQueryReturnInfo) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri3")
- .SetSchema("Message")
- .AddStringProperty("body", "message body3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(7);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document3;
- EXPECT_THAT(
- icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete all docs to test the information is correctly grouped.
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- DeleteByQueryResultProto result_proto =
- icing.DeleteByQuery(search_spec, true);
- EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteByQueryStatsProto exp_stats;
- exp_stats.set_latency_ms(7);
- exp_stats.set_num_documents_deleted(3);
- exp_stats.set_query_length(search_spec.query().length());
- exp_stats.set_num_terms(1);
- exp_stats.set_num_namespaces_filtered(0);
- exp_stats.set_num_schema_types_filtered(0);
- exp_stats.set_parse_query_latency_ms(7);
- exp_stats.set_document_removal_latency_ms(7);
- EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
-
- // Check that DeleteByQuery can return information for deleted documents.
- DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
- info1.set_namespace_("namespace1");
- info1.set_schema("Message");
- info1.add_uris("uri1");
- info2.set_namespace_("namespace2");
- info2.set_schema("Message");
- info2.add_uris("uri3");
- info2.add_uris("uri2");
- EXPECT_THAT(result_proto.deleted_documents(),
- UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
-
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
- EXPECT_THAT(
- icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
- .status()
- .code(),
- Eq(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Delete all docs containing 'foo', which should be none of them. Both docs
- // should still be present.
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(),
- ProtoStatusIs(StatusProto::NOT_FOUND));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- expected_get_result_proto.mutable_status()->clear_message();
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- search_spec = SearchSpecProto::default_instance();
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
- // Creates 3 test schemas
- SchemaProto schema1 = SchemaProto(CreateMessageSchema());
-
- SchemaProto schema2 = SchemaProto(schema1);
- auto new_property2 = schema2.mutable_types(0)->add_properties();
- new_property2->set_property_name("property2");
- new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property2->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- new_property2->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- SchemaProto schema3 = SchemaProto(schema2);
- auto new_property3 = schema3.mutable_types(0)->add_properties();
- new_property3->set_property_name("property3");
- new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property3->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- new_property3->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that SetSchema() works right after Optimize()
- EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk());
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document;
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
-
- // Validates that Search() works right after Optimize()
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // Destroys IcingSearchEngine to make sure nothing is cached.
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- {
- // Initializes a normal icing to create files needed
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- }
-
- // Creates a mock filesystem in which DeleteDirectoryRecursively() always
- // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes
- // it return ABORTED_ERROR.
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ON_CALL(*mock_filesystem,
- DeleteDirectoryRecursively(HasSubstr("_optimize_tmp")))
- .WillByDefault(Return(false));
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED));
-
- // Now optimization is aborted, we verify that document-related functions
- // still work as expected.
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- OptimizationShouldRecoverIfFileDirectoriesAreMissing) {
- // Creates a mock filesystem in which SwapFiles() always fails and deletes the
- // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
- HasSubstr("document_dir")))
- .WillByDefault([this](const char* one, const char* two) {
- filesystem()->DeleteDirectoryRecursively(one);
- filesystem()->DeleteDirectoryRecursively(two);
- return false;
- });
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- // Optimize() fails due to filesystem error
- OptimizeResultProto result = icing.Optimize();
- EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- // Should rebuild the index for data loss.
- EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
- Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
-
- // Document is not found because original file directory is missing
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- DocumentProto new_document =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "new body")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // Searching old content returns nothing because original file directory is
- // missing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- search_spec.set_query("n");
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- new_document;
-
- // Searching new content returns the new document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
- // Creates a mock filesystem in which SwapFiles() always fails and empties the
- // directories. This will fail IcingSearchEngine::OptimizeDocumentStore().
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"),
- HasSubstr("document_dir")))
- .WillByDefault([this](const char* one, const char* two) {
- filesystem()->DeleteDirectoryRecursively(one);
- filesystem()->CreateDirectoryRecursively(one);
- filesystem()->DeleteDirectoryRecursively(two);
- filesystem()->CreateDirectoryRecursively(two);
- return false;
- });
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- // Optimize() fails due to filesystem error
- OptimizeResultProto result = icing.Optimize();
- EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
- // Should rebuild the index for data loss.
- EXPECT_THAT(result.optimize_stats().index_restoration_mode(),
- Eq(OptimizeStatsProto::FULL_INDEX_REBUILD));
-
- // Document is not found because original files are missing
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri) not found.");
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- DocumentProto new_document =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "new body")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("m");
- search_spec.set_term_match_type(TermMatchType::PREFIX);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // Searching old content returns nothing because original files are missing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- search_spec.set_query("n");
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- new_document;
-
- // Searching new content returns the new document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .Build();
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document;
-
- // Time just has to be less than the document's creation timestamp (100) + the
- // document's ttl (500)
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(400);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Check that the document is returned as part of search results
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .Build();
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // Time just has to be greater than the document's creation timestamp (100) +
- // the document's ttl (500)
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(700);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
-
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Check that the document is not returned as part of search results
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("message");
-
- auto property = type_config->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- DocumentProto message_document =
- DocumentBuilder()
- .SetKey("namespace", "message_uri")
- .SetSchema("message")
- .AddStringProperty("body", "foo")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk());
-
- // Make sure we can search for message document
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
-
- // The message isn't indexed, so we get nothing
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // With just the schema type filter, we can search for the message
- search_spec.Clear();
- search_spec.add_schema_type_filters("message");
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- message_document;
-
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
- // will force a change in the DocumentStore's cached SchemaTypeIds
- schema.clear_types();
- type_config = schema.add_types();
- type_config->set_schema_type("email");
-
- // Adding a new indexed property will require reindexing
- type_config = schema.add_types();
- type_config->set_schema_type("message");
-
- property = type_config->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- search_spec.Clear();
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.add_schema_type_filters("message");
-
- // We can still search for the message document
- search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
-
- // We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Checks that DocumentLog is still ok
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Checks that the index is still ok so we can search over it
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) {
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- const std::string schema_file =
- absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
- const std::string corrupt_data = "1234";
- EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
- corrupt_data.size()));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- const std::string corrupt_data = "1234";
- EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
- corrupt_data.data(), corrupt_data.size()));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::INTERNAL));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2_with_additional_property =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("additional", "content")
- .AddStringProperty("body", "message body")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- {
- // Initializes folder and schema
- IcingSearchEngine icing(options, GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- property = type->add_properties();
- property->set_property_name("additional");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
- ProtoIsOk());
-
- // Won't get us anything because "additional" isn't marked as an indexed
- // property in the schema
- SearchSpecProto search_spec;
- search_spec.set_query("additional:content");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- {
- // This schema will change the SchemaTypeIds from the previous schema_
- // (since SchemaTypeIds are assigned based on order of the types, and this
- // new schema changes the ordering of previous types)
- SchemaProto new_schema;
- auto type = new_schema.add_types();
- type->set_schema_type("Email");
-
- type = new_schema.add_types();
- type->set_schema_type("Message");
-
- // Adding a new property changes the SectionIds (since SectionIds are
- // assigned based on alphabetical order of indexed sections, marking
- // "additional" as an indexed property will push the "body" property to a
- // different SectionId)
- auto property = type->add_properties();
- property->set_property_name("body");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- property = type->add_properties();
- property->set_property_name("additional");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- // Write the marker file
- std::string marker_filepath =
- absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
- ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
- ASSERT_TRUE(sfd.is_valid());
-
- // Write the new schema
- FakeClock fake_clock;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
- } // Will persist new schema
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // We can insert a Email document since we kept the new schema
- DocumentProto email_document =
- DocumentBuilder()
- .SetKey("namespace", "email_uri")
- .SetSchema("Email")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = email_document;
-
- EXPECT_THAT(icing.Get("namespace", "email_uri",
- GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- SearchSpecProto search_spec;
-
- // The section restrict will ensure we are using the correct, updated
- // SectionId in the Index
- search_spec.set_query("additional:content");
-
- // Schema type filter will ensure we're using the correct, updated
- // SchemaTypeId in the DocumentStore
- search_spec.add_schema_type_filters("Message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2_with_additional_property;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
-
- {
- // Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- {
- FakeClock fake_clock;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
-
- // Puts a second document into DocumentStore but doesn't index it.
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
- schema_store.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(document2));
- }
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- // Index Restoration should be triggered here and document2 should be
- // indexed.
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document1;
-
- // DocumentStore kept the additional document
- EXPECT_THAT(
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- *expected_get_result_proto.mutable_document() = document2;
- EXPECT_THAT(
- icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // We indexed the additional document
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Pretend we lost the entire index
- EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
- absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str()));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Check that our index is ok by searching over the restored index
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Pretend index is corrupted
- const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
- ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
- ASSERT_TRUE(fd.is_valid());
- ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Check that our index is ok by searching over the restored index
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending score order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship of them is:
- // document1 < document2 < document3
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(1571111111111)
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(1572222222222)
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(1573333333333)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
-
- // Results should not be ranked by score but returned in reverse insertion
- // order.
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of creation
- // timestamp score is: document1 < document2 < document3
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(1571111111111)
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(1572222222222)
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(1573333333333)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending timestamp order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in a different order to eliminate the
- // possibility that the following results are sorted in the default reverse
- // insertion order.
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 >
- // doc1 when ranked by USAGE_TYPE1_COUNT.
- UsageReport usage_report_doc3 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- UsageReport usage_report_doc2 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
- ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
- ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending USAGE_TYPE1_COUNT order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultShouldHaveDefaultOrderWithoutUsageCounts) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // None of the documents have usage reports. Result should be in the default
- // reverse insertion order.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in a different order to eliminate the
- // possibility that the following results are sorted in the default reverse
- // insertion order.
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when
- // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP.
- UsageReport usage_report_doc2 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000,
- UsageReport::USAGE_TYPE1);
- UsageReport usage_report_doc3 = CreateUsageReport(
- /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000,
- UsageReport::USAGE_TYPE1);
- ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
- ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // However, doc5 has more matches in the text section.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 3 times
- "namespace1/uri7", // 'coffee' 2 times
- "namespace1/uri1", // 'food' 2 times
- "namespace1/uri4", // 'food' 2 times
- "namespace1/uri2", // 'food' 1 time
- "namespace1/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceAdvanced) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1");
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // However, doc5 has more matches in the text section.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 3 times
- "namespace1/uri7", // 'coffee' 2 times
- "namespace1/uri1", // 'food' 2 times
- "namespace1/uri4", // 'food' 2 times
- "namespace1/uri2", // 'food' 1 time
- "namespace1/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceNotOperator) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza",
- "thin-crust pizza. good and fast. nice coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee -starbucks");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 3 times, 'starbucks' 0 times
- "namespace1/uri3")); // 'coffee' 1 times, 'starbucks' 0 times
-}
-
-TEST_F(IcingSearchEngineTest,
- Bm25fRelevanceScoringOneNamespaceSectionRestrict) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document =
- CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18,
- "peets coffee, best coffee",
- "espresso. decaf. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri7", /*score=*/4, "starbucks",
- "habit. birthday rewards. good coffee. brewed coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("subject:coffee OR body:food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // The term frequencies of "coffee" and "food" are calculated respectively
- // from the subject section and the body section.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(
- GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject
- "namespace1/uri1", // 'food' 2 times in section body
- "namespace1/uri4", // 'food' 2 times in section body
- "namespace1/uri2", // 'food' 1 time in section body
- "namespace1/uri6")); // 'food' 1 time in section body
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringTwoNamespaces) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace2".
- document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
- "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
- "starbucks coffee", "good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- ResultSpecProto result_spec_proto;
- result_spec_proto.set_num_per_page(16);
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec_proto);
-
- // Result should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // The two corpora have the same documents except for document 7, which in
- // "namespace2" is much shorter than the average dcoument length, so it is
- // boosted.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
- "namespace1/uri5", // 'coffee' 3 times
- "namespace2/uri5", // 'coffee' 3 times
- "namespace1/uri7", // 'coffee' 2 times
- "namespace1/uri1", // 'food' 2 times
- "namespace2/uri1", // 'food' 2 times
- "namespace1/uri4", // 'food' 2 times
- "namespace2/uri4", // 'food' 2 times
- "namespace1/uri2", // 'food' 1 time
- "namespace2/uri2", // 'food' 1 time
- "namespace1/uri6", // 'food' 1 time
- "namespace2/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringWithNamespaceFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace1".
- DocumentProto document = CreateEmailDocument(
- "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace1", "namespace1/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
- "starbucks coffee",
- "habit. birthday rewards. good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Create and index documents in namespace "namespace2".
- document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
- "sushi belmont",
- "fresh fish. inexpensive. good sushi.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
- "indian food. buffet. spicy food. kadai chicken.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
- "panda express",
- "chinese food. cheap. inexpensive. kung pao.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
- "speederia pizza",
- "thin-crust pizza. good and fast.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
- "whole foods",
- "salads. pizza. organic food. expensive.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
- "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument(
- "namespace2", "namespace2/uri6", /*score=*/4, "costco",
- "bulk. cheap whole beans. frozen fish. food samples.");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
- "starbucks coffee", "good coffee");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("coffee OR food");
- // Now query only corpus 2
- search_spec.add_namespace_filters("namespace2");
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- search_result_proto = icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance());
-
- // Result from namespace "namespace2" should be in descending score order
- EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // Even though doc5 has more matches in the text section, doc7's length is
- // much shorter than the average corpus's length, so it's being boosted.
- // Documents with "food" are ranked lower as the term "food" is commonly
- // present in this corpus, and thus, has a lower IDF.
- EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc
- "namespace2/uri5", // 'coffee' 3 times
- "namespace2/uri1", // 'food' 2 times
- "namespace2/uri4", // 'food' 2 times
- "namespace2/uri2", // 'food' 1 time
- "namespace2/uri6")); // 'food' 1 time
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 test documents
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // None of the documents have usage reports. Result should be in the default
- // reverse insertion order.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -5589,13 +448,6 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
.AddStringProperty("body", "message2")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -5638,948 +490,6 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
expected_search_result_proto));
}
-TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- // Intentionally inserts the documents in the order that is different than
- // their score order
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // "m" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- // Result should be in ascending score order
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
- SearchResultProto search_result_proto = icing.Search(
- search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingDuplicateNamespaceShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify "namespace1" twice. This should result in an error.
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace1");
- result_grouping = result_spec.add_result_groupings();
- entry = result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingDuplicateSchemaShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify "Message" twice. This should result in an error.
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("nonexistentMessage");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify "namespace1xMessage" twice. This should result in an error.
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- // Specify zero results. This should result in an error.
- ResultSpecProto result_spec;
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(0);
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-
- // Specify negative results. This should result in an error.
- result_spec.mutable_result_groupings(0)->set_max_results(-1);
- EXPECT_THAT(search_result_proto.status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiNamespaceGrouping) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3 < document4 < document5 <
- // document6
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/4")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document5 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/5")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(5)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document6 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/6")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(6)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
- // "m" will match all 6 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(2);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace3");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // The last result (document1) in namespace "namespace1" should not be
- // included. "namespace2" and "namespace3" are grouped together. So only the
- // two highest scored documents between the two (both of which are in
- // "namespace3") should be returned.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document6;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document5;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiSchemaGrouping) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetScore(1)
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .SetScore(2)
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace2", "uri3")
- .SetSchema("Message")
- .SetScore(3)
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- // "f" will match all 3 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("f");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("Email");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Each of the highest scored documents of schema type "Message" (document3)
- // and "Email" (document1) should be returned.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document3;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document1;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingMultiNamespaceAndSchemaGrouping) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 3 documents and ensures the relationship in terms of document
- // score is: document1 < document2 < document3 < document4 < document5 <
- // document6
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/4")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document5 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/5")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(5)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document6 =
- DocumentBuilder()
- .SetKey("namespace3", "uri/6")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(6)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
- // "m" will match all 6 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
- result_grouping = result_spec.add_result_groupings();
- result_grouping->set_max_results(1);
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("namespace3");
- entry->set_schema("Message");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // The three highest scored documents that fit the criteria of
- // "namespace1xMessage" (document2), "namespace2xMessage" (document4),
- // and "namespace3xMessage" (document6) should be returned.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document6;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonexistentNamespaceShouldBeIgnored) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::NAMESPACE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace1");
- entry = result_grouping->add_entry_groupings();
- entry->set_namespace_("nonexistentNamespace");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Only the top ranked document in "namespace" (document2), should be
- // returned. The presence of "nonexistentNamespace" in the same result
- // grouping should have no effect.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonexistentSchemaShouldBeIgnored) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("nonexistentMessage");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Only the top ranked document in "Message" (document2), should be
- // returned. The presence of "nonexistentMessage" in the same result
- // grouping should have no effect.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document2;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates 2 documents and ensures the relationship in terms of document
- // score is: document1 < document2
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message1")
- .SetScore(1)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace1", "uri/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message2")
- .SetScore(2)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/3")
- .SetSchema("Message")
- .AddStringProperty("body", "message3")
- .SetScore(3)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace2", "uri/4")
- .SetSchema("Message")
- .AddStringProperty("body", "message4")
- .SetScore(4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
-
- // "m" will match all 2 documents
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("m");
-
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
-
- ResultSpecProto result_spec;
- result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE);
- ResultSpecProto::ResultGrouping* result_grouping =
- result_spec.add_result_groupings();
- ResultSpecProto::ResultGrouping::Entry* entry =
- result_grouping->add_entry_groupings();
- result_grouping->set_max_results(1);
- entry->set_namespace_("namespace2");
- entry->set_schema("Message");
- entry = result_grouping->add_entry_groupings();
- entry->set_schema("namespace1");
- entry->set_schema("nonexistentMessage");
-
- SearchResultProto search_result_proto =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- // Only the top ranked document in "namespace2xMessage" (document4), should be
- // returned. The presence of "namespace1xnonexistentMessage" in the same
- // result grouping should have no effect. If either the namespace or the
- // schema type is nonexistent, the entire entry will be ignored.
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- document4;
-
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-}
-
-TEST_F(IcingSearchEngineTest,
- SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
- SchemaProto incompatible_schema = schema;
- incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
- PropertyConfigProto::Cardinality::REQUIRED);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
-
- // Since we don't have any documents yet, we can't detect this edge-case. But
- // it should be fine since there aren't any documents to be invalidated.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
- SchemaProto incompatible_schema = schema;
- incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
- PropertyConfigProto::Cardinality::REQUIRED);
-
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Can retrieve by namespace/uri
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() = document;
-
- ASSERT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Can search for it
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
-
- // Setting the new, different schema will remove incompatible documents
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
-
- // Can't retrieve by namespace/uri
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
- expected_get_result_proto.mutable_status()->set_message(
- "Document (namespace, uri) not found.");
-
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Can't search for it
- SearchResultProto empty_result;
- empty_result.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStatsAndScores(empty_result));
-}
-
TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) {
DocumentProto document = CreateMessageDocument("namespace", "uri");
{
@@ -6868,252 +778,6 @@ TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesInternalError) {
ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
}
-TEST_F(IcingSearchEngineTest, SnippetNormalization) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "MDI zurich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "mdi Zürich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("mdi Zürich");
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(2));
- const DocumentProto& result_document_1 = results.results(0).document();
- const SnippetProto& result_snippet_1 = results.results(0).snippet();
- EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_1, result_snippet_1.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_1.entries(0)),
- ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
- ElementsAre("mdi", "Zürich"));
-
- const DocumentProto& result_document_2 = results.results(1).document();
- const SnippetProto& result_snippet_2 = results.results(1).snippet();
- EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_2,
- result_snippet_2.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_2.entries(0)),
- ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
- ElementsAre("MDI", "zurich"));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "MDI zurich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "mdi Zürich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("md Zür");
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(2));
- const DocumentProto& result_document_1 = results.results(0).document();
- const SnippetProto& result_snippet_1 = results.results(0).snippet();
- EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_1, result_snippet_1.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_1.entries(0)),
- ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
- ElementsAre("mdi", "Zürich"));
-
- const DocumentProto& result_document_2 = results.results(1).document();
- const SnippetProto& result_snippet_2 = results.results(1).snippet();
- EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_2,
- result_snippet_2.entries(0).property_name());
- EXPECT_THAT(
- GetWindows(content, result_snippet_2.entries(0)),
- ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
- ElementsAre("MDI", "zurich"));
-}
-
-TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Email")
- .AddStringProperty("subject", "MDI zurich Team Meeting")
- .AddStringProperty("body", "MDI zurich Team Meeting")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Email")
- .AddStringProperty("subject", "MDI zurich trip")
- .AddStringProperty("body", "Let's travel to zurich")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- auto search_spec = std::make_unique<SearchSpecProto>();
- search_spec->set_term_match_type(TermMatchType::PREFIX);
- search_spec->set_query("body:Zür");
-
- auto result_spec = std::make_unique<ResultSpecProto>();
- result_spec->set_num_per_page(1);
- result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec->mutable_snippet_spec()->set_num_matches_per_property(10);
- result_spec->mutable_snippet_spec()->set_num_to_snippet(10);
-
- auto scoring_spec = std::make_unique<ScoringSpecProto>();
- *scoring_spec = GetDefaultScoringSpec();
-
- SearchResultProto results =
- icing.Search(*search_spec, *scoring_spec, *result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(1));
-
- const DocumentProto& result_document_two = results.results(0).document();
- const SnippetProto& result_snippet_two = results.results(0).snippet();
- EXPECT_THAT(result_document_two, EqualsProto(document_two));
- EXPECT_THAT(result_snippet_two.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
- std::string_view content = GetString(
- &result_document_two, result_snippet_two.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
- ElementsAre("Let's travel to zurich"));
- EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
- ElementsAre("zurich"));
-
- search_spec.reset();
- scoring_spec.reset();
- result_spec.reset();
-
- results = icing.GetNextPage(results.next_page_token());
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(1));
-
- const DocumentProto& result_document_one = results.results(0).document();
- const SnippetProto& result_snippet_one = results.results(0).snippet();
- EXPECT_THAT(result_document_one, EqualsProto(document_one));
- EXPECT_THAT(result_snippet_one.entries(), SizeIs(1));
- EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
- content = GetString(&result_document_one,
- result_snippet_one.entries(0).property_name());
- EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
- ElementsAre("MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
- ElementsAre("zurich"));
-}
-
-TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-
- SchemaProto email_schema = CreateMessageSchema();
- EXPECT_THAT(icing.SetSchema(email_schema).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchema().status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-
- DocumentProto doc = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(doc).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing
- .Get(doc.namespace_(), doc.uri(),
- GetResultSpecProto::default_instance())
- .status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
- .status()
- .code(),
- Eq(StatusProto::FAILED_PRECONDITION));
-
- SearchSpecProto search_spec = SearchSpecProto::default_instance();
- ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
- ResultSpecProto result_spec = ResultSpecProto::default_instance();
- EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- constexpr int kSomePageToken = 12;
- EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
-
- EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Optimize().status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-}
-
TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
DocumentProto namespace1 = DocumentBuilder()
.SetKey("namespace1", "uri")
@@ -7213,1551 +877,6 @@ TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
}
}
-TEST_F(IcingSearchEngineTest, Hyphens) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("MyType");
- PropertyConfigProto* prop = type->add_properties();
- prop->set_property_name("foo");
- prop->set_data_type(PropertyConfigProto::DataType::STRING);
- prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- prop->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- prop->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("MyType")
- .AddStringProperty("foo", "foo bar-baz bat")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("MyType")
- .AddStringProperty("foo", "bar for baz bat-man")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("foo:bar-baz");
-
- ResultSpecProto result_spec;
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
-
- EXPECT_THAT(results.status(), ProtoIsOk());
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
-}
-
-TEST_F(IcingSearchEngineTest, RestoreIndex) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
-
- // 3. Create the index again. This should trigger index restoration.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // All documents should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(3));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
- EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0"));
- }
-}
-
-TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the last document from the document log
- {
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(
- auto create_result,
- PortableFileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
- std::move(create_result.proto_log);
-
- document = DocumentBuilder(document).SetUri("fake_type/0").Build();
- DocumentWrapper wrapper;
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
-
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
- }
-
- // 3. Create the index again. This should throw out the lite index and trigger
- // index restoration which will only restore the two documents in the main
- // index.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // Only the documents that were in the main index should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1"));
- EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0"));
- }
-}
-
-TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the last two documents from the document log.
- {
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(
- auto create_result,
- PortableFileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- PortableFileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
- std::move(create_result.proto_log);
-
- document = DocumentBuilder(document).SetUri("fake_type/0").Build();
- DocumentWrapper wrapper;
- *wrapper.mutable_document() = document;
- ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
- }
-
- // 3. Create the index again. This should throw out the lite and main index
- // and trigger index restoration.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // Only the first document should be retrievable.
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0"));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- DocumentWithNoIndexedContentDoesntCauseRestoreIndex) {
- // 1. Create an index with a single document in it that has no indexed
- // content.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Set a schema for a single type that has no indexed properties.
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("unindexedField")
- .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- // Add a document that contains no indexed content.
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("unindexedField",
- "Don't you dare search over this!")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Create the index again. This should NOT trigger a recovery of any kind.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- EXPECT_THAT(init_result.status(), ProtoIsOk());
- EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
- // 1. Create an index with a single document in it that has no valid indexed
- // tokens in its content.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Set a schema for a single type that has no indexed properties.
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add a document that contains no valid indexed content - just punctuation.
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "?...!")
- .Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Create the index again. This should NOT trigger a recovery of any kind.
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto init_result = icing.Initialize();
- EXPECT_THAT(init_result.status(), ProtoIsOk());
- EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- }
-}
-
-TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- // 1. Create an index with a LiteIndex that will only allow one document
- // before needing a merge.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- IcingSearchEngine icing(options, GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Add two documents. These should get merged into the main index.
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- document = DocumentBuilder(document).SetUri("fake_type/1").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- // Add one document. This one should get remain in the lite index.
- document = DocumentBuilder(document).SetUri("fake_type/2").Build();
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // 2. Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
-
- // 3. Setup a mock filesystem to fail to grow the main index once.
- bool has_failed_already = false;
- auto open_write_lambda = [this, &has_failed_already](const char* filename) {
- std::string main_lexicon_suffix = "/main-lexicon.prop.2";
- std::string filename_string(filename);
- if (!has_failed_already &&
- filename_string.length() >= main_lexicon_suffix.length() &&
- filename_string.substr(
- filename_string.length() - main_lexicon_suffix.length(),
- main_lexicon_suffix.length()) == main_lexicon_suffix) {
- has_failed_already = true;
- return -1;
- }
- return this->filesystem()->OpenForWrite(filename);
- };
- auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
- ON_CALL(*mock_icing_filesystem, OpenForWrite)
- .WillByDefault(open_write_lambda);
-
- // 4. Create the index again. This should trigger index restoration.
- {
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- options.set_index_merge_size(document.ByteSizeLong());
- TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
- std::move(mock_icing_filesystem),
- std::make_unique<FakeClock>(),
- GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(),
- ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
-
- SearchSpecProto search_spec;
- search_spec.set_query("consectetur");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.next_page_token(), Eq(0));
- // Only the last document that was added should still be retrievable.
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
- }
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogFunctionLatency) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("icing", "fake_type/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "fake_type/2")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
- Eq(0));
-
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
- Eq(1));
-
- // Put another document.
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
- Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
- // Even though the fake timer will return 10, all the latency numbers related
- // to recovery / restoration should be 0 during the first-time initialization.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- {
- // Append a non-checksummed document. This will mess up the checksum of the
- // proto log, forcing it to rewind and later return a DATA_LOSS error.
- const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
-
- int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
- filesystem()->PWrite(document_log_file.c_str(), file_size,
- serialized_document.data(),
- serialized_document.size());
- }
-
- {
- // Document store will rewind to previous checkpoint. The cause should be
- // DATA_LOSS and the data status should be PARTIAL_LOSS.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::PARTIAL_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseCompleteDataLoss) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("icing", "fake_type/1")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- const std::string document_log_file = absl_ports::StrCat(
- GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
- int64_t corruptible_offset;
-
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
-
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // There's some space at the beginning of the file (e.g. header, kmagic,
- // etc) that is necessary to initialize the FileBackedProtoLog. We can't
- // corrupt that region, so we need to figure out the offset at which
- // documents will be written to - which is the file size after
- // initialization.
- corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
-
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- }
-
- {
- // "Corrupt" the content written in the log. Make the corrupt document
- // smaller than our original one so we don't accidentally write past our
- // file.
- DocumentProto document =
- DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
- std::string serialized_document = document.SerializeAsString();
- ASSERT_TRUE(filesystem()->PWrite(
- document_log_file.c_str(), corruptible_offset,
- serialized_document.data(), serialized_document.size()));
-
- PortableFileBackedProtoLog<DocumentWrapper>::Header header =
- ReadDocumentLogHeader(*filesystem(), document_log_file);
-
- // Set dirty bit to true to reflect that something changed in the log.
- header.SetDirtyFlag(true);
- header.SetHeaderChecksum(header.CalculateHeaderChecksum());
-
- WriteDocumentLogHeader(*filesystem(), document_log_file, header);
- }
-
- {
- // Document store will completely rewind. The cause should be DATA_LOSS and
- // the data status should be COMPLETE_LOSS.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::COMPLETE_LOSS));
- // The complete rewind of ground truth causes us to clear the index, but
- // that's not considered a restoration.
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- {
- // Initialize and put a document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- {
- // Delete the index file to trigger RestoreIndexIfNeeded.
- std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
- }
-
- {
- // Index is empty but ground truth is not. Index should be restored due to
- // the inconsistency.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- IcingSearchEngineOptions options = GetDefaultIcingOptions();
- {
- // Initialize and put one document.
- IcingSearchEngine icing(options, GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- {
- // Simulate a schema change where power is lost after the schema is written.
- SchemaProto new_schema =
- SchemaBuilder()
- .AddType(
- SchemaTypeConfigBuilder()
- .SetType("Message")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- // Write the marker file
- std::string marker_filepath =
- absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
- ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
- ASSERT_TRUE(sfd.is_valid());
-
- // Write the new schema
- FakeClock fake_clock;
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
- }
-
- {
- // Both document store and index should be recovered from checksum mismatch.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-
- {
- // No recovery should be needed.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- {
- // Initialize and put one document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // lambda to fail OpenForWrite on lite index hit buffer once.
- bool has_failed_already = false;
- auto open_write_lambda = [this, &has_failed_already](const char* filename) {
- std::string lite_index_buffer_file_path =
- absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
- std::string filename_string(filename);
- if (!has_failed_already && filename_string == lite_index_buffer_file_path) {
- has_failed_already = true;
- return -1;
- }
- return this->filesystem()->OpenForWrite(filename);
- };
-
- auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
- // This fails Index::Create() once.
- ON_CALL(*mock_icing_filesystem, OpenForWrite)
- .WillByDefault(open_write_lambda);
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::move(mock_icing_filesystem),
- std::move(fake_clock), GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseDocStoreIOError) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- {
- // Initialize and put one document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
-
- // lambda to fail Read on document store header once.
- bool has_failed_already = false;
- auto read_lambda = [this, &has_failed_already](const char* filename,
- void* buf, size_t buf_size) {
- std::string document_store_header_file_path =
- absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
- std::string filename_string(filename);
- if (!has_failed_already &&
- filename_string == document_store_header_file_path) {
- has_failed_already = true;
- return false;
- }
- return this->filesystem()->Read(filename, buf, buf_size);
- };
-
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- // This fails DocumentStore::InitializeDerivedFiles() once.
- ON_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
- .WillByDefault(read_lambda);
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::move(mock_filesystem),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
-
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(0));
-}
-
-TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- }
-
- {
- // Delete the schema store header file to trigger an I/O error.
- std::string schema_store_header_file_path =
- GetSchemaDir() + "/schema_store_header";
- filesystem()->DeleteFile(schema_store_header_file_path.c_str());
- }
-
- {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .schema_store_recovery_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .document_store_recovery_latency_ms(),
- Eq(0));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().document_store_data_status(),
- Eq(InitializeStatsProto::NO_DATA_LOSS));
- EXPECT_THAT(
- initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
- EXPECT_THAT(initialize_result_proto.initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- }
-}
-
-TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) {
- {
- // Initialize an empty storage.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- // There should be 0 schema types.
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
- Eq(0));
-
- // Set a schema with one type config.
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- // There should be 1 schema type.
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
- Eq(1));
-
- // Create and set a schema with two type configs: Email and Message.
- SchemaProto schema = CreateEmailSchema();
-
- auto type = schema.add_types();
- type->set_schema_type("Message");
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- }
-
- {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
- Eq(2));
- }
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogFunctionLatency) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .AddStringProperty("body", "message body")
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
- Eq(10));
- size_t document_size = put_result_proto.put_document_stats().document_size();
- EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
- EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
- sizeof(DocumentProto::InternalFields)));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
- DocumentProto document = DocumentBuilder()
- .SetKey("icing", "fake_type/0")
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
- // No merge should happen.
- EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
- Eq(0));
- // The input document has 2 tokens.
- EXPECT_THAT(put_result_proto.put_document_stats()
- .tokenization_stats()
- .num_tokens_indexed(),
- Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("icing", "fake_type/1")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "fake_type/2")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
-
- // Create an icing instance with index_merge_size = document1's size.
- IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
- icing_options.set_index_merge_size(document1.ByteSizeLong());
-
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(10);
- TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Putting document2 should trigger an index merge.
- PutResultProto put_result_proto = icing.Put(document2);
- EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
- Eq(10));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // 1. Add two email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("subject", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("subject", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // 2. Issue a query that will match those documents and use an empty field
- // mask to request NO properties.
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("hello");
-
- ResultSpecProto result_spec;
- // Retrieve only one result at a time to make sure that projection works when
- // retrieving all pages.
- result_spec.set_num_per_page(1);
- TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
- email_field_mask->set_schema_type("Email");
- email_field_mask->add_paths("");
-
- SearchResultProto results =
- icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
-
- // 3. Verify that the returned results contain no properties.
- DocumentProto projected_document_two = DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_two));
-
- results = icing.GetNextPage(results.next_page_token());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
- DocumentProto projected_document_one = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_one));
-}
-
-TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // 1. Add two email documents
- DocumentProto document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .AddStringProperty("emailAddress", "shopgirl@aol.com")
- .Build())
- .AddStringProperty("subject", "Hello World!")
- .AddStringProperty(
- "body", "Oh what a beautiful morning! Oh what a beautiful day!")
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty(
- "sender", DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .AddStringProperty("emailAddress", "ny152@aol.com")
- .Build())
- .AddStringProperty("subject", "Goodnight Moon!")
- .AddStringProperty("body",
- "Count all the sheep and tell them 'Hello'.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // 2. Issue a query that will match those documents and request only
- // 'sender.name' and 'subject' properties.
- // Create all of search_spec, result_spec and scoring_spec as objects with
- // scope that will end before the call to GetNextPage to ensure that the
- // implementation isn't relying on references to any of them.
- auto search_spec = std::make_unique<SearchSpecProto>();
- search_spec->set_term_match_type(TermMatchType::PREFIX);
- search_spec->set_query("hello");
-
- auto result_spec = std::make_unique<ResultSpecProto>();
- // Retrieve only one result at a time to make sure that projection works when
- // retrieving all pages.
- result_spec->set_num_per_page(1);
- TypePropertyMask* email_field_mask = result_spec->add_type_property_masks();
- email_field_mask->set_schema_type("Email");
- email_field_mask->add_paths("sender.name");
- email_field_mask->add_paths("subject");
-
- auto scoring_spec = std::make_unique<ScoringSpecProto>();
- *scoring_spec = GetDefaultScoringSpec();
- SearchResultProto results =
- icing.Search(*search_spec, *scoring_spec, *result_spec);
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
-
- // 3. Verify that the first returned result only contains the 'sender.name'
- // property.
- DocumentProto projected_document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Person")
- .AddStringProperty("name", "Tom Hanks")
- .Build())
- .AddStringProperty("subject", "Goodnight Moon!")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_two));
-
- // 4. Now, delete all of the specs used in the search. GetNextPage should have
- // no problem because it shouldn't be keeping any references to them.
- search_spec.reset();
- result_spec.reset();
- scoring_spec.reset();
-
- // 5. Verify that the second returned result only contains the 'sender.name'
- // property.
- results = icing.GetNextPage(results.next_page_token());
- EXPECT_THAT(results.status(), ProtoIsOk());
- EXPECT_THAT(results.results(), SizeIs(1));
- DocumentProto projected_document_one =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetCreationTimestampMs(1000)
- .SetSchema("Email")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Person")
- .AddStringProperty("name", "Meg Ryan")
- .Build())
- .AddStringProperty("subject", "Hello World!")
- .Build();
- EXPECT_THAT(results.results(0).document(),
- EqualsProto(projected_document_one));
-}
-
-TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(5);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Creates and inserts 5 documents
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
- DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.add_namespace_filters("namespace");
- search_spec.add_schema_type_filters(document1.schema());
- search_spec.set_query("message");
-
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(2);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
- result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
-
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
-
- // Searches and gets the first page, 2 results with 2 snippets
- SearchResultProto search_result =
- icing.Search(search_spec, scoring_spec, result_spec);
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
-
- // Check the stats
- QueryStatsProto exp_stats;
- exp_stats.set_query_length(7);
- exp_stats.set_num_terms(1);
- exp_stats.set_num_namespaces_filtered(1);
- exp_stats.set_num_schema_types_filtered(1);
- exp_stats.set_ranking_strategy(
- ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- exp_stats.set_is_first_page(true);
- exp_stats.set_requested_page_size(2);
- exp_stats.set_num_results_returned_current_page(2);
- exp_stats.set_num_documents_scored(5);
- exp_stats.set_num_results_with_snippets(2);
- exp_stats.set_latency_ms(5);
- exp_stats.set_parse_query_latency_ms(5);
- exp_stats.set_scoring_latency_ms(5);
- exp_stats.set_ranking_latency_ms(5);
- exp_stats.set_document_retrieval_latency_ms(5);
- exp_stats.set_lock_acquisition_latency_ms(5);
- EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
-
- // Second page, 2 result with 1 snippet
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(2));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
-
- exp_stats = QueryStatsProto();
- exp_stats.set_is_first_page(false);
- exp_stats.set_requested_page_size(2);
- exp_stats.set_num_results_returned_current_page(2);
- exp_stats.set_num_results_with_snippets(1);
- exp_stats.set_latency_ms(5);
- exp_stats.set_document_retrieval_latency_ms(5);
- exp_stats.set_lock_acquisition_latency_ms(5);
- EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
-
- // Third page, 1 result with 0 snippets
- search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status(), ProtoIsOk());
- ASSERT_THAT(search_result.results(), SizeIs(1));
- ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
-
- exp_stats = QueryStatsProto();
- exp_stats.set_is_first_page(false);
- exp_stats.set_requested_page_size(2);
- exp_stats.set_num_results_returned_current_page(1);
- exp_stats.set_num_results_with_snippets(0);
- exp_stats.set_latency_ms(5);
- exp_stats.set_document_retrieval_latency_ms(5);
- exp_stats.set_lock_acquisition_latency_ms(5);
- EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
-}
-
-TEST_F(IcingSearchEngineTest, OptimizeStatsProtoTest) {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(5);
- fake_clock->SetSystemTimeMilliseconds(10000);
- auto icing = std::make_unique<TestIcingSearchEngine>(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::move(fake_clock),
- GetTestJniCache());
- ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Create three documents.
- DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
- DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- document2.set_creation_timestamp_ms(9000);
- document2.set_ttl_ms(500);
- DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
- ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
-
- // Delete the first document.
- ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
- ProtoIsOk());
- ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
-
- OptimizeStatsProto expected;
- expected.set_latency_ms(5);
- expected.set_document_store_optimize_latency_ms(5);
- expected.set_index_restoration_latency_ms(5);
- expected.set_num_original_documents(3);
- expected.set_num_deleted_documents(1);
- expected.set_num_expired_documents(1);
- expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
-
- // Run Optimize
- OptimizeResultProto result = icing->Optimize();
- // Depending on how many blocks the documents end up spread across, it's
- // possible that Optimize can remove documents without shrinking storage. The
- // first Optimize call will also write the OptimizeStatusProto for the first
- // time which will take up 1 block. So make sure that before_size is no less
- // than after_size - 1 block.
- uint32_t page_size = getpagesize();
- EXPECT_THAT(result.optimize_stats().storage_size_before(),
- Ge(result.optimize_stats().storage_size_after() - page_size));
- result.mutable_optimize_stats()->clear_storage_size_before();
- result.mutable_optimize_stats()->clear_storage_size_after();
- EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
-
- fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetTimerElapsedMilliseconds(5);
- fake_clock->SetSystemTimeMilliseconds(20000);
- icing = std::make_unique<TestIcingSearchEngine>(
- GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(), std::move(fake_clock),
- GetTestJniCache());
- ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
-
- expected = OptimizeStatsProto();
- expected.set_latency_ms(5);
- expected.set_document_store_optimize_latency_ms(5);
- expected.set_index_restoration_latency_ms(5);
- expected.set_num_original_documents(1);
- expected.set_num_deleted_documents(0);
- expected.set_num_expired_documents(0);
- expected.set_time_since_last_optimize_ms(10000);
- expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION);
-
- // Run Optimize
- result = icing->Optimize();
- EXPECT_THAT(result.optimize_stats().storage_size_before(),
- Eq(result.optimize_stats().storage_size_after()));
- result.mutable_optimize_stats()->clear_storage_size_before();
- result.mutable_optimize_stats()->clear_storage_size_after();
- EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
-
- // Delete the last document.
- ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(),
- ProtoIsOk());
-
- expected = OptimizeStatsProto();
- expected.set_latency_ms(5);
- expected.set_document_store_optimize_latency_ms(5);
- expected.set_index_restoration_latency_ms(5);
- expected.set_num_original_documents(1);
- expected.set_num_deleted_documents(1);
- expected.set_num_expired_documents(0);
- expected.set_time_since_last_optimize_ms(0);
- // Should rebuild the index since all documents are removed.
- expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD);
-
- // Run Optimize
- result = icing->Optimize();
- EXPECT_THAT(result.optimize_stats().storage_size_before(),
- Ge(result.optimize_stats().storage_size_after()));
- result.mutable_optimize_stats()->clear_storage_size_before();
- result.mutable_optimize_stats()->clear_storage_size_after();
- EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
-}
-
TEST_F(IcingSearchEngineTest, StorageInfoTest) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -8778,1481 +897,6 @@ TEST_F(IcingSearchEngineTest, StorageInfoTest) {
EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0));
}
-TEST_F(IcingSearchEngineTest, SnippetErrorTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
- PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REPEATED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetScore(10)
- .SetSchema("Generic")
- .AddStringProperty("subject", "I like cats", "I like dogs",
- "I like birds", "I like fish")
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetScore(20)
- .SetSchema("Generic")
- .AddStringProperty("subject", "I like red", "I like green",
- "I like blue", "I like yellow")
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri3")
- .SetScore(5)
- .SetSchema("Generic")
- .AddStringProperty("subject", "I like cupcakes", "I like donuts",
- "I like eclairs", "I like froyo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.add_schema_type_filters("Generic");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("like");
- ScoringSpecProto scoring_spec;
- scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
- result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
- SearchResultProto search_results =
- icing.Search(search_spec, scoring_spec, result_spec);
-
- ASSERT_THAT(search_results.results(), SizeIs(3));
- const SearchResultProto::ResultProto* result = &search_results.results(0);
- EXPECT_THAT(result->document().uri(), Eq("uri2"));
- ASSERT_THAT(result->snippet().entries(), SizeIs(3));
- const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), "subject[0]");
- std::string_view content = GetString(&result->document(), "subject[0]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(1);
- EXPECT_THAT(entry->property_name(), "subject[1]");
- content = GetString(&result->document(), "subject[1]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(2);
- EXPECT_THAT(entry->property_name(), "subject[2]");
- content = GetString(&result->document(), "subject[2]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- result = &search_results.results(1);
- EXPECT_THAT(result->document().uri(), Eq("uri1"));
- ASSERT_THAT(result->snippet().entries(), SizeIs(3));
- entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), "subject[0]");
- content = GetString(&result->document(), "subject[0]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(1);
- ASSERT_THAT(entry->property_name(), "subject[1]");
- content = GetString(&result->document(), "subject[1]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- entry = &result->snippet().entries(2);
- ASSERT_THAT(entry->property_name(), "subject[2]");
- content = GetString(&result->document(), "subject[2]");
- EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
-
- result = &search_results.results(2);
- ASSERT_THAT(result->document().uri(), Eq("uri3"));
- ASSERT_THAT(result->snippet().entries(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // String: "我每天走路去上班。"
- // ^ ^ ^ ^^
- // UTF8 idx: 0 3 9 15 18
- // UTF16 idx: 0 1 3 5 6
- // Breaks into segments: "我", "每天", "走路", "去", "上班"
- constexpr std::string_view kChinese = "我每天走路去上班。";
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", kChinese)
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
-
- // Search and request snippet matching but no windowing.
- SearchSpecProto search_spec;
- search_spec.set_query("走");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_num_to_snippet(
- std::numeric_limits<int>::max());
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(
- std::numeric_limits<int>::max());
-
- // Search and make sure that we got a single successful result
- SearchResultProto search_results = icing.Search(
- search_spec, ScoringSpecProto::default_instance(), result_spec);
- ASSERT_THAT(search_results.status(), ProtoIsOk());
- ASSERT_THAT(search_results.results(), SizeIs(1));
- const SearchResultProto::ResultProto* result = &search_results.results(0);
- EXPECT_THAT(result->document().uri(), Eq("uri1"));
-
- // Ensure that one and only one property was matched and it was "body"
- ASSERT_THAT(result->snippet().entries(), SizeIs(1));
- const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), Eq("body"));
-
- // Get the content for "subject" and see what the match is.
- std::string_view content = GetString(&result->document(), "body");
- ASSERT_THAT(content, Eq(kChinese));
-
- // Ensure that there is one and only one match within "subject"
- ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
- const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-
- EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
- EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
- std::string_view match =
- content.substr(match_proto.exact_match_byte_position(),
- match_proto.exact_match_byte_length());
- ASSERT_THAT(match, Eq("走路"));
-
- // Ensure that the utf-16 values are also as expected
- EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
- EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, InvalidToEmptyQueryTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // String: "Luca Brasi sleeps with the 🐟🐟🐟."
- // ^ ^ ^ ^ ^ ^ ^ ^ ^
- // UTF8 idx: 0 5 11 18 23 27 3135 39
- // UTF16 idx: 0 5 11 18 23 27 2931 33
- // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
- // and "🐟".
- constexpr std::string_view kSicilianMessage =
- "Luca Brasi sleeps with the 🐟🐟🐟.";
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", kSicilianMessage)
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "Some other content.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // Search and request snippet matching but no windowing.
- SearchSpecProto search_spec;
- search_spec.set_query("?");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
- ScoringSpecProto scoring_spec;
- ResultSpecProto result_spec;
-
- // Search and make sure that we got a single successful result
- SearchResultProto search_results =
- icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query("。");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query("-");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query(":");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query("OR");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-
- search_spec.set_query(" ");
- search_results = icing.Search(search_spec, scoring_spec, result_spec);
- EXPECT_THAT(search_results.status(), ProtoIsOk());
- EXPECT_THAT(search_results.results(), SizeIs(2));
-}
-
-TEST_F(IcingSearchEngineTest, EmojiSnippetTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // String: "Luca Brasi sleeps with the 🐟🐟🐟."
- // ^ ^ ^ ^ ^ ^ ^ ^ ^
- // UTF8 idx: 0 5 11 18 23 27 3135 39
- // UTF16 idx: 0 5 11 18 23 27 2931 33
- // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟"
- // and "🐟".
- constexpr std::string_view kSicilianMessage =
- "Luca Brasi sleeps with the 🐟🐟🐟.";
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", kSicilianMessage)
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- DocumentProto document_two =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Message")
- .AddStringProperty("body", "Some other content.")
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- // Search and request snippet matching but no windowing.
- SearchSpecProto search_spec;
- search_spec.set_query("🐟");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
-
- ResultSpecProto result_spec;
- result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
- result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
-
- // Search and make sure that we got a single successful result
- SearchResultProto search_results = icing.Search(
- search_spec, ScoringSpecProto::default_instance(), result_spec);
- ASSERT_THAT(search_results.status(), ProtoIsOk());
- ASSERT_THAT(search_results.results(), SizeIs(1));
- const SearchResultProto::ResultProto* result = &search_results.results(0);
- EXPECT_THAT(result->document().uri(), Eq("uri1"));
-
- // Ensure that one and only one property was matched and it was "body"
- ASSERT_THAT(result->snippet().entries(), SizeIs(1));
- const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
- EXPECT_THAT(entry->property_name(), Eq("body"));
-
- // Get the content for "subject" and see what the match is.
- std::string_view content = GetString(&result->document(), "body");
- ASSERT_THAT(content, Eq(kSicilianMessage));
-
- // Ensure that there is one and only one match within "subject"
- ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
- const SnippetMatchProto& match_proto = entry->snippet_matches(0);
-
- EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27));
- EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4));
- std::string_view match =
- content.substr(match_proto.exact_match_byte_position(),
- match_proto.exact_match_byte_length());
- ASSERT_THAT(match, Eq("🐟"));
-
- // Ensure that the utf-16 values are also as expected
- EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27));
- EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
-}
-
-TEST_F(IcingSearchEngineTest, PutDocumentIndexFailureDeletion) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // Testing has shown that adding ~600,000 terms generated this way will
- // fill up the hit buffer.
- std::vector<std::string> terms = GenerateUniqueTerms(600000);
- std::string content = absl_ports::StrJoin(terms, " ");
- DocumentProto document = DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Message")
- .AddStringProperty("body", "foo " + content)
- .Build();
- // We failed to add the document to the index fully. This means that we should
- // reject the document from Icing entirely.
- ASSERT_THAT(icing.Put(document).status(),
- ProtoStatusIs(StatusProto::OUT_OF_SPACE));
-
- // Make sure that the document isn't searchable.
- SearchSpecProto search_spec;
- search_spec.set_query("foo");
- search_spec.set_term_match_type(TERM_MATCH_PREFIX);
-
- SearchResultProto search_results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(search_results.status(), ProtoIsOk());
- ASSERT_THAT(search_results.results(), IsEmpty());
-
- // Make sure that the document isn't retrievable.
- GetResultProto get_result =
- icing.Get("namespace", "uri1", GetResultSpecProto::default_instance());
- ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4
- // termFour, 3 termThree, 2 termTwo and one termOne.
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty(
- "subject", "termOne termTwo termThree termFour termFive termSix")
- .Build();
- DocumentProto document2 =
- DocumentBuilder()
- .SetKey("namespace", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject",
- "termTwo termThree termFour termFive termSix")
- .Build();
- DocumentProto document3 =
- DocumentBuilder()
- .SetKey("namespace", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termThree termFour termFive termSix")
- .Build();
- DocumentProto document4 =
- DocumentBuilder()
- .SetKey("namespace", "uri4")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termFour termFive termSix")
- .Build();
- DocumentProto document5 =
- DocumentBuilder()
- .SetKey("namespace", "uri5")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termFive termSix")
- .Build();
- DocumentProto document6 = DocumentBuilder()
- .SetKey("namespace", "uri6")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termSix")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("t");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Query all suggestions, and they will be ranked.
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
- ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
- ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
- ASSERT_THAT(response.suggestions().at(3).query(), "termthree");
- ASSERT_THAT(response.suggestions().at(4).query(), "termtwo");
- ASSERT_THAT(response.suggestions().at(5).query(), "termone");
-
- // Query first three suggestions, and they will be ranked.
- suggestion_spec.set_num_to_return(3);
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions().at(0).query(), "termsix");
- ASSERT_THAT(response.suggestions().at(1).query(), "termfive");
- ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInOneNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace1 has 2 results.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace3", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace2 and namespace3 has 2 results.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace2");
- suggestion_spec.add_namespace_filters("namespace3");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NamespaceNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Search for non-exist namespace3
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace3");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::OK));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- // Index 4 documents,
- // namespace1 has 2 hit2 for term one
- // namespace2 has 2 hit2 for term two and 1 hit for term one.
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termone")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termone")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termone termtwo")
- .Build();
- DocumentProto document4 = DocumentBuilder()
- .SetKey("namespace2", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "termtwo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionTermOne;
- suggestionTermOne.set_query("termone");
- SuggestionResponse::Suggestion suggestionTermTwo;
- suggestionTermTwo.set_query("termtwo");
-
- // only search suggestion for namespace2. The correctly order should be
- // {"termtwo", "termone"}. If we're not filtering out namespace1 when
- // calculating our score, then it will be {"termone", "termtwo"}.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("t");
- suggestion_spec.add_namespace_filters("namespace2");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- ElementsAre(EqualsProto(suggestionTermTwo),
- EqualsProto(suggestionTermOne)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DeletionTest) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace1 has this suggestion
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // namespace2 has this suggestion
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // delete document from namespace 1
- EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
-
- // Now namespace1 will return empty
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace1");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-
- // namespace2 still has this suggestion, so we can prove the reason of
- // namespace 1 cannot find it is we filter it out, not it doesn't exist.
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ShouldReturnInOneDocument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- // Only search in namespace1,uri1
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
- namespace1_uri1->add_document_uris("uri1");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // Only search in namespace1,uri2
- suggestion_spec.clear_document_uri_filters();
- NamespaceDocumentUriGroup* namespace1_uri2 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri2->set_namespace_("namespace1");
- namespace1_uri2->add_document_uris("uri2");
-
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInMultipleDocument) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace1", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- // Only search document in namespace1,uri1 and namespace2,uri2
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1_uri2 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1_uri2->set_namespace_("namespace1");
- namespace1_uri1_uri2->add_document_uris("uri1");
- namespace1_uri1_uri2->add_document_uris("uri2");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool),
- EqualsProto(suggestionFoo)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- DocumentProto document3 = DocumentBuilder()
- .SetKey("namespace3", "uri3")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- // Only search document in namespace1,uri1 and all documents under namespace2
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.add_namespace_filters("namespace2");
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
- namespace1_uri1->add_document_uris("uri1");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool),
- EqualsProto(suggestionFoo)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DocumentIdDoesntExist) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Search for a non-exist document id : namespace3,uri3
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_namespace_filters("namespace3");
- NamespaceDocumentUriGroup* namespace3_uri3 =
- suggestion_spec.add_document_uri_filters();
- namespace3_uri3->set_namespace_("namespace3");
- namespace3_uri3->add_document_uris("uri3");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "foo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- // Search for the document namespace1,uri1 with namespace filter in
- // namespace2.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
- namespace1_uri1->add_document_uris("uri1");
- suggestion_spec.add_namespace_filters("namespace2");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_EmptyDocumentIdInNamespace) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Give empty document uris in namespace 1
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- NamespaceDocumentUriGroup* namespace1_uri1 =
- suggestion_spec.add_document_uri_filters();
- namespace1_uri1->set_namespace_("namespace1");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Email");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_SchemaTypeNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Email");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_ShouldReturnInDesiredProperty) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender",
- DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .AddStringProperty("emailAddress", "fo")
- .Build())
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Only search in subject.
- TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
- mask->set_schema_type("Email");
- mask->add_paths("subject");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // Search in subject and sender.name
- suggestion_spec.clear_type_property_filters();
- mask = suggestion_spec.add_type_property_filters();
- mask->set_schema_type("Email");
- mask->add_paths("subject");
- mask->add_paths("sender.name");
-
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFool)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_NestedPropertyReturnNothing) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Only search in Person.name.
- suggestion_spec.add_schema_type_filters("Person");
- TypePropertyMask* mask = suggestion_spec.add_type_property_filters();
- mask->set_schema_type("Person");
- mask->add_paths("name");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_PropertyFilterAndSchemaFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(10)
- .AddStringProperty("subject", "fool")
- .AddDocumentProperty("sender", DocumentBuilder()
- .SetKey("namespace", "uri1-sender")
- .SetSchema("Person")
- .AddStringProperty("name", "foo")
- .Build())
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace1", "uri2")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFoo;
- suggestionFoo.set_query("foo");
- SuggestionResponse::Suggestion suggestionFo;
- suggestionFo.set_query("fo");
-
- // Search in sender.name of Email and everything in Message.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Email");
- suggestion_spec.add_schema_type_filters("Message");
- TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
- mask1->set_schema_type("Email");
- mask1->add_paths("sender.name");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFoo),
- EqualsProto(suggestionFo)));
-}
-
-TEST_F(IcingSearchEngineTest,
- SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
- PropertyConfigBuilder()
- .SetName("name")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person",
- /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty("body", "fo")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Search in sender.name of Email but schema type is Message.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
- suggestion_spec.add_schema_type_filters("Message");
- TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters();
- mask1->set_schema_type("Email");
- mask1->add_paths("sender.name");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_OrderByTermFrequency) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
- PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_REQUIRED)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document1 =
- DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Message")
- .SetCreationTimestampMs(10)
- .AddStringProperty(
- "body", "termthree termthree termthree termtwo termtwo termone")
- .Build();
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
-
- // Search in sender.name of Email but schema type is Message.
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("t");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::EXACT_ONLY);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY);
-
- SuggestionResponse::Suggestion suggestionTermOne;
- suggestionTermOne.set_query("termone");
- SuggestionResponse::Suggestion suggestionTermTwo;
- suggestionTermTwo.set_query("termtwo");
- SuggestionResponse::Suggestion suggestionTermThree;
- suggestionTermThree.set_query("termthree");
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- ElementsAre(EqualsProto(suggestionTermThree),
- EqualsProto(suggestionTermTwo),
- EqualsProto(suggestionTermOne)));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ExpiredTest) {
- DocumentProto document1 = DocumentBuilder()
- .SetKey("namespace1", "uri1")
- .SetSchema("Email")
- .SetCreationTimestampMs(100)
- .SetTtlMs(500)
- .AddStringProperty("subject", "fool")
- .Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("namespace2", "uri2")
- .SetSchema("Email")
- .SetCreationTimestampMs(100)
- .SetTtlMs(1000)
- .AddStringProperty("subject", "fool")
- .Build();
- {
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(400);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
- ProtoIsOk());
-
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
-
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- // namespace1 has this suggestion
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
-
- // namespace2 has this suggestion
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
- }
- // We reinitialize here so we can feed in a fake clock this time
- {
- // Time needs to be past document1 creation time (100) + ttl (500) for it
- // to count as "expired". document2 is not expired since its ttl is 1000.
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(800);
-
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("f");
- suggestion_spec.add_namespace_filters("namespace1");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- // Now namespace1 will return empty
- suggestion_spec.clear_namespace_filters();
- suggestion_spec.add_namespace_filters("namespace1");
- SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(), IsEmpty());
-
- // namespace2 still has this suggestion
- SuggestionResponse::Suggestion suggestionFool;
- suggestionFool.set_query("fool");
-
- suggestion_spec.add_namespace_filters("namespace2");
- response = icing.SearchSuggestions(suggestion_spec);
- ASSERT_THAT(response.status(), ProtoIsOk());
- ASSERT_THAT(response.suggestions(),
- UnorderedElementsAre(EqualsProto(suggestionFool)));
- }
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_emptyPrefix) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("");
- suggestion_spec.set_num_to_return(10);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- SuggestionSpecProto suggestion_spec;
- suggestion_spec.set_prefix("prefix");
- suggestion_spec.set_num_to_return(0);
- suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
- TermMatchType::PREFIX);
- suggestion_spec.mutable_scoring_spec()->set_rank_by(
- SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT);
-
- ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
- ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityBasicSucceeds) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -10328,539 +972,6 @@ TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) {
ASSERT_THAT(result.status(), ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, IcingShouldWorkFor64Sections) {
- // Create a schema with 64 sections
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- // Person has 4 sections.
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("firstName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("lastName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("phoneNumber")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- // Email has 16 sections.
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("body")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("date")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("time")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("sender")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("receiver")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("cc")
- .SetDataTypeDocument(
- "Person", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_REPEATED)))
- .AddType(SchemaTypeConfigBuilder()
- // EmailCollection has 64 sections.
- .SetType("EmailCollection")
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email1")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email2")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email3")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("email4")
- .SetDataTypeDocument(
- "Email", /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- DocumentProto person1 =
- DocumentBuilder()
- .SetKey("namespace", "person1")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first1")
- .AddStringProperty("lastName", "last1")
- .AddStringProperty("emailAddress", "email1@gmail.com")
- .AddStringProperty("phoneNumber", "000-000-001")
- .Build();
- DocumentProto person2 =
- DocumentBuilder()
- .SetKey("namespace", "person2")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first2")
- .AddStringProperty("lastName", "last2")
- .AddStringProperty("emailAddress", "email2@gmail.com")
- .AddStringProperty("phoneNumber", "000-000-002")
- .Build();
- DocumentProto person3 =
- DocumentBuilder()
- .SetKey("namespace", "person3")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first3")
- .AddStringProperty("lastName", "last3")
- .AddStringProperty("emailAddress", "email3@gmail.com")
- .AddStringProperty("phoneNumber", "000-000-003")
- .Build();
- DocumentProto email1 = DocumentBuilder()
- .SetKey("namespace", "email1")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-01")
- .AddStringProperty("time", "1:00 PM")
- .AddDocumentProperty("sender", person1)
- .AddDocumentProperty("receiver", person2)
- .AddDocumentProperty("cc", person3)
- .Build();
- DocumentProto email2 = DocumentBuilder()
- .SetKey("namespace", "email2")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-02")
- .AddStringProperty("time", "2:00 PM")
- .AddDocumentProperty("sender", person2)
- .AddDocumentProperty("receiver", person1)
- .AddDocumentProperty("cc", person3)
- .Build();
- DocumentProto email3 = DocumentBuilder()
- .SetKey("namespace", "email3")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-03")
- .AddStringProperty("time", "3:00 PM")
- .AddDocumentProperty("sender", person3)
- .AddDocumentProperty("receiver", person1)
- .AddDocumentProperty("cc", person2)
- .Build();
- DocumentProto email4 = DocumentBuilder()
- .SetKey("namespace", "email4")
- .SetSchema("Email")
- .AddStringProperty("body", "test body")
- .AddStringProperty("subject", "test subject")
- .AddStringProperty("date", "2022-08-04")
- .AddStringProperty("time", "4:00 PM")
- .AddDocumentProperty("sender", person3)
- .AddDocumentProperty("receiver", person2)
- .AddDocumentProperty("cc", person1)
- .Build();
- DocumentProto email_collection =
- DocumentBuilder()
- .SetKey("namespace", "email_collection")
- .SetSchema("EmailCollection")
- .AddDocumentProperty("email1", email1)
- .AddDocumentProperty("email2", email2)
- .AddDocumentProperty("email3", email3)
- .AddDocumentProperty("email4", email4)
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk());
-
- const std::vector<std::string> query_terms = {
- "first1", "last2", "email3@gmail.com", "000-000-001",
- "body", "subject", "2022-08-02", "3\\:00"};
- SearchResultProto expected_document;
- expected_document.mutable_status()->set_code(StatusProto::OK);
- *expected_document.mutable_results()->Add()->mutable_document() =
- email_collection;
- for (const std::string& query_term : query_terms) {
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query(query_term);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(expected_document));
- }
-
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("foo");
- SearchResultProto expected_no_documents;
- expected_no_documents.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
-}
-
-TEST_F(IcingSearchEngineTest, JoinByQualifiedId) {
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Person")
- .AddProperty(PropertyConfigBuilder()
- .SetName("firstName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("lastName")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("emailAddress")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .AddType(SchemaTypeConfigBuilder()
- .SetType("Email")
- .AddProperty(PropertyConfigBuilder()
- .SetName("subject")
- .SetDataTypeString(TERM_MATCH_PREFIX,
- TOKENIZER_PLAIN)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("personQualifiedId")
- .SetDataTypeJoinableString(
- JOINABLE_VALUE_TYPE_QUALIFIED_ID)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
-
- DocumentProto person1 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "person1")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first1")
- .AddStringProperty("lastName", "last1")
- .AddStringProperty("emailAddress", "email1@gmail.com")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(1)
- .Build();
- DocumentProto person2 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "person2")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first2")
- .AddStringProperty("lastName", "last2")
- .AddStringProperty("emailAddress", "email2@gmail.com")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(2)
- .Build();
- DocumentProto person3 =
- DocumentBuilder()
- .SetKey(R"(pkg$db/name#space\\)", "person3")
- .SetSchema("Person")
- .AddStringProperty("firstName", "first3")
- .AddStringProperty("lastName", "last3")
- .AddStringProperty("emailAddress", "email3@gmail.com")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(3)
- .Build();
-
- DocumentProto email1 =
- DocumentBuilder()
- .SetKey("namespace", "email1")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 1")
- .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(3)
- .Build();
- DocumentProto email2 =
- DocumentBuilder()
- .SetKey("namespace", "email2")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 2")
- .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(2)
- .Build();
- DocumentProto email3 =
- DocumentBuilder()
- .SetKey("namespace", "email3")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 3")
- .AddStringProperty("personQualifiedId",
- R"(pkg$db/name\#space\\\\#person3)") // escaped
- .SetCreationTimestampMs(kDefaultCreationTimestampMs)
- .SetScore(1)
- .Build();
-
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
-
- // Parent SearchSpec
- SearchSpecProto search_spec;
- search_spec.set_term_match_type(TermMatchType::PREFIX);
- search_spec.set_query("firstName:first");
-
- // JoinSpec
- JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
- join_spec->set_parent_property_expression(
- std::string(JoinProcessor::kQualifiedIdExpr));
- join_spec->set_child_property_expression("personQualifiedId");
- join_spec->set_aggregation_scoring_strategy(
- JoinSpecProto::AggregationScoringStrategy::MAX);
- JoinSpecProto::NestedSpecProto* nested_spec =
- join_spec->mutable_nested_spec();
- SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
- nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
- nested_search_spec->set_query("subject:test");
- *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
- *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
-
- // Parent ScoringSpec
- ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
-
- // Parent ResultSpec
- ResultSpecProto result_spec;
- result_spec.set_num_per_page(1);
-
- // Since we:
- // - Use MAX for aggregation scoring strategy.
- // - (Default) use DOCUMENT_SCORE to score child documents.
- // - (Default) use DESC as the ranking order.
- //
- // person1 + email1 should have the highest aggregated score (3) and be
- // returned first. person2 + email2 (aggregated score = 2) should be the
- // second, and person3 + email3 (aggregated score = 1) should be the last.
- SearchResultProto expected_result1;
- expected_result1.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto::ResultProto* result_proto1 =
- expected_result1.mutable_results()->Add();
- *result_proto1->mutable_document() = person1;
- *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1;
-
- SearchResultProto expected_result2;
- expected_result2.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto::ResultProto* result_proto2 =
- expected_result2.mutable_results()->Add();
- *result_proto2->mutable_document() = person2;
- *result_proto2->mutable_joined_results()->Add()->mutable_document() = email2;
-
- SearchResultProto expected_result3;
- expected_result3.mutable_status()->set_code(StatusProto::OK);
- SearchResultProto::ResultProto* result_proto3 =
- expected_result3.mutable_results()->Add();
- *result_proto3->mutable_document() = person3;
- *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3;
-
- SearchResultProto result1 =
- icing.Search(search_spec, scoring_spec, result_spec);
- uint64_t next_page_token = result1.next_page_token();
- EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
- expected_result1.set_next_page_token(next_page_token);
- EXPECT_THAT(result1,
- EqualsSearchResultIgnoreStatsAndScores(expected_result1));
-
- SearchResultProto result2 = icing.GetNextPage(next_page_token);
- next_page_token = result2.next_page_token();
- EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
- expected_result2.set_next_page_token(next_page_token);
- EXPECT_THAT(result2,
- EqualsSearchResultIgnoreStatsAndScores(expected_result2));
-
- SearchResultProto result3 = icing.GetNextPage(next_page_token);
- next_page_token = result3.next_page_token();
- EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
- EXPECT_THAT(result3,
- EqualsSearchResultIgnoreStatsAndScores(expected_result3));
-}
-
-TEST_F(IcingSearchEngineTest, NumericFilterAdvancedQuerySucceeds) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create the schema and document store
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("transaction")
- .AddProperty(PropertyConfigBuilder()
- .SetName("price")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("cost")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document_one = DocumentBuilder()
- .SetKey("namespace", "1")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 10)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = DocumentBuilder()
- .SetKey("namespace", "2")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 25)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- DocumentProto document_three = DocumentBuilder()
- .SetKey("namespace", "3")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("cost", 2)
- .Build();
- ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("price < 20");
- search_spec.set_search_type(
- SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
- search_spec.add_enabled_features(std::string(kNumericSearchFeature));
-
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
-
- search_spec.set_query("price == 25");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
-
- search_spec.set_query("cost > 2");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.results(), IsEmpty());
-
- search_spec.set_query("cost >= 2");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(1));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_three));
-
- search_spec.set_query("price <= 25");
- results = icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- ASSERT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
-}
-
-TEST_F(IcingSearchEngineTest, NumericFilterOldQueryFails) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Create the schema and document store
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("transaction")
- .AddProperty(PropertyConfigBuilder()
- .SetName("price")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL))
- .AddProperty(PropertyConfigBuilder()
- .SetName("cost")
- .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
- .SetCardinality(CARDINALITY_OPTIONAL)))
- .Build();
- ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
-
- DocumentProto document_one = DocumentBuilder()
- .SetKey("namespace", "1")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 10)
- .Build();
- ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
-
- DocumentProto document_two = DocumentBuilder()
- .SetKey("namespace", "2")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("price", 25)
- .Build();
- ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
-
- DocumentProto document_three = DocumentBuilder()
- .SetKey("namespace", "3")
- .SetSchema("transaction")
- .SetCreationTimestampMs(1)
- .AddInt64Property("cost", 2)
- .Build();
- ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk());
-
- SearchSpecProto search_spec;
- search_spec.set_query("price < 20");
- search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY);
- search_spec.add_enabled_features(std::string(kNumericSearchFeature));
-
- SearchResultProto results =
- icing.Search(search_spec, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 6123f47..6608e44 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -70,6 +70,8 @@ namespace lib {
namespace {
+using ::testing::IsTrue;
+
// Creates a fake type config with 10 properties (p0 - p9)
void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
type_config->set_schema_type("Fake_Type");
@@ -79,7 +81,7 @@ void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
property->set_property_name(
IcingStringUtil::StringPrintf("p%d", i)); // p0 - p9
property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
property->mutable_string_indexing_config()->set_term_match_type(
TermMatchType::EXACT_ONLY);
property->mutable_string_indexing_config()->set_tokenizer_type(
@@ -151,10 +153,14 @@ std::unique_ptr<Normalizer> CreateNormalizer() {
.ValueOrDie();
}
-std::unique_ptr<SchemaStore> CreateSchemaStore(const Clock* clock) {
- Filesystem filesystem;
+std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem,
+ const Clock* clock,
+ const std::string& base_dir) {
+ std::string schema_store_dir = base_dir + "/schema_store_test";
+ filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
+
std::unique_ptr<SchemaStore> schema_store =
- SchemaStore::Create(&filesystem, GetTestTempDir(), clock).ValueOrDie();
+ SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
SchemaProto schema;
CreateFakeTypeConfig(schema.add_types());
@@ -167,8 +173,8 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Clock* clock) {
return schema_store;
}
-void CleanUp(const Filesystem& filesystem, const std::string& index_dir) {
- filesystem.DeleteDirectoryRecursively(index_dir.c_str());
+void CleanUp(const Filesystem& filesystem, const std::string& base_dir) {
+ filesystem.DeleteDirectoryRecursively(base_dir.c_str());
}
void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
@@ -180,20 +186,26 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IndexProcessor> index_processor,
IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
@@ -210,7 +222,14 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithOneProperty)
->Arg(1000)
@@ -237,20 +256,26 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IndexProcessor> index_processor,
IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
@@ -269,7 +294,14 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithTenProperties)
->Arg(1000)
@@ -296,20 +328,26 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IndexProcessor> index_processor,
IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
@@ -328,7 +366,14 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithDiacriticLetters)
->Arg(1000)
@@ -355,20 +400,26 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
IcingFilesystem icing_filesystem;
Filesystem filesystem;
- std::string index_dir = GetTestTempDir() + "/index_test/";
+ std::string base_dir = GetTestTempDir() + "/index_processor_benchmark";
+ std::string index_dir = base_dir + "/index_test/";
+ std::string integer_index_dir = base_dir + "/integer_index_test/";
- CleanUp(filesystem, index_dir);
+ CleanUp(filesystem, base_dir);
+ ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()),
+ IsTrue());
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
- std::unique_ptr<NumericIndex<int64_t>> integer_index =
- std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumericIndex<int64_t>> integer_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
Clock clock;
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, &clock, base_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IndexProcessor> index_processor,
IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
@@ -386,7 +437,14 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
index_processor->IndexDocument(tokenized_document, document_id++));
}
- CleanUp(filesystem, index_dir);
+ index_processor.reset();
+ schema_store.reset();
+ normalizer.reset();
+ language_segmenter.reset();
+ integer_index.reset();
+ index.reset();
+
+ CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_IndexDocumentWithHiragana)
->Arg(1000)
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 04f27a7..626efa7 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -132,6 +132,7 @@ using DataType = PropertyConfigProto::DataType;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::SizeIs;
using ::testing::Test;
@@ -150,12 +151,21 @@ class IndexProcessorTest : public Test {
GetTestFilePath("icing/icu.dat")));
}
- index_dir_ = GetTestTempDir() + "/index_test";
+ base_dir_ = GetTestTempDir() + "/index_processor_test";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ index_dir_ = base_dir_ + "/index";
+ integer_index_dir_ = base_dir_ + "/integer_index";
+ schema_store_dir_ = base_dir_ + "/schema_store";
+
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
- integer_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ integer_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, integer_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -167,12 +177,11 @@ class IndexProcessorTest : public Test {
normalizer_factory::Create(
/*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
- std::string schema_store_dir = GetTestTempDir() + "/schema_store";
ASSERT_TRUE(
- filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str()));
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
- SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
SchemaProto schema =
SchemaBuilder()
.AddType(
@@ -257,7 +266,14 @@ class IndexProcessorTest : public Test {
}
void TearDown() override {
- filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
+ index_processor_.reset();
+ schema_store_.reset();
+ normalizer_.reset();
+ lang_segmenter_.reset();
+ integer_index_.reset();
+ index_.reset();
+
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
}
std::unique_ptr<IcingMockFilesystem> mock_icing_filesystem_;
@@ -265,12 +281,15 @@ class IndexProcessorTest : public Test {
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
FakeClock fake_clock_;
+ std::string base_dir_;
std::string index_dir_;
+ std::string integer_index_dir_;
+ std::string schema_store_dir_;
- std::unique_ptr<LanguageSegmenter> lang_segmenter_;
- std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<Index> index_;
std::unique_ptr<NumericIndex<int64_t>> integer_index_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<IndexProcessor> index_processor_;
};
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 4349cc9..c7b6380 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -79,6 +79,7 @@ class IndexTest : public Test {
}
void TearDown() override {
+ index_.reset();
icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
}
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index 0a3317c..b01f278 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -78,7 +78,7 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->GetTermId(term_));
ICING_ASSIGN_OR_RETURN(uint32_t term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- lite_index_->AppendHits(
+ lite_index_->FetchHits(
term_id, section_restrict_mask_,
/*only_from_prefix_sections=*/false,
/*score_by=*/
@@ -105,7 +105,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
ICING_ASSIGN_OR_RETURN(
uint32_t term_id,
term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
- lite_index_->AppendHits(
+ lite_index_->FetchHits(
term_id, section_restrict_mask_,
/*only_from_prefix_sections=*/!exact_match,
/*score_by=*/
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 1ea945c..bf54dec 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -30,6 +30,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
@@ -114,6 +115,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
uint64_t file_size;
IcingTimer timer;
+ absl_ports::unique_lock l(&mutex_);
if (!lexicon_.CreateIfNotExist(options_.lexicon_options) ||
!lexicon_.Init()) {
return absl_ports::InternalError("Failed to initialize lexicon trie");
@@ -241,6 +243,7 @@ Crc32 LiteIndex::ComputeChecksum() {
libtextclassifier3::Status LiteIndex::Reset() {
IcingTimer timer;
+ absl_ports::unique_lock l(&mutex_);
// TODO(b/140436942): When these components have been changed to return errors
// they should be propagated from here.
lexicon_.Clear();
@@ -253,11 +256,13 @@ libtextclassifier3::Status LiteIndex::Reset() {
}
void LiteIndex::Warm() {
+ absl_ports::shared_lock l(&mutex_);
hit_buffer_.Warm();
lexicon_.Warm();
}
libtextclassifier3::Status LiteIndex::PersistToDisk() {
+ absl_ports::unique_lock l(&mutex_);
bool success = true;
if (!lexicon_.Sync()) {
ICING_VLOG(1) << "Failed to sync the lexicon.";
@@ -279,6 +284,7 @@ void LiteIndex::UpdateChecksum() {
libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
const std::string& term, TermMatchType::Code term_match_type,
NamespaceId namespace_id) {
+ absl_ports::unique_lock l(&mutex_);
uint32_t tvi;
libtextclassifier3::Status status =
lexicon_.Insert(term.c_str(), "", &tvi, false);
@@ -287,13 +293,19 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
<< status.error_message();
return status;
}
- ICING_RETURN_IF_ERROR(UpdateTermProperties(
+ ICING_RETURN_IF_ERROR(UpdateTermPropertiesImpl(
tvi, term_match_type == TermMatchType::PREFIX, namespace_id));
return tvi;
}
libtextclassifier3::Status LiteIndex::UpdateTermProperties(
uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
+ absl_ports::unique_lock l(&mutex_);
+ return UpdateTermPropertiesImpl(tvi, hasPrefixHits, namespace_id);
+}
+
+libtextclassifier3::Status LiteIndex::UpdateTermPropertiesImpl(
+ uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
if (hasPrefixHits &&
!lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) {
return absl_ports::ResourceExhaustedError(
@@ -309,6 +321,7 @@ libtextclassifier3::Status LiteIndex::UpdateTermProperties(
}
libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
+ absl_ports::unique_lock l(&mutex_);
if (is_full()) {
return absl_ports::ResourceExhaustedError("Hit buffer is full!");
}
@@ -329,6 +342,7 @@ libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
const std::string& term) const {
+ absl_ports::shared_lock l(&mutex_);
char dummy;
uint32_t tvi;
if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) {
@@ -338,7 +352,7 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
return tvi;
}
-int LiteIndex::AppendHits(
+int LiteIndex::FetchHits(
uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
@@ -349,9 +363,27 @@ int LiteIndex::AppendHits(
DocumentId last_document_id = kInvalidDocumentId;
// Record whether the last document belongs to the given namespaces.
bool is_last_document_desired = false;
- for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
- TermIdHitPair term_id_hit_pair(
- hit_buffer_.array_cast<TermIdHitPair>()[idx]);
+
+ if (NeedSort()) {
+ // Transition from shared_lock in NeedSort to unique_lock here is safe
+ // because it doesn't hurt to sort again if sorting was done already by
+ // another thread after NeedSort is evaluated. NeedSort is called before
+ // sorting to improve concurrency as threads can avoid acquiring the unique
+ // lock if no sorting is needed.
+ absl_ports::unique_lock l(&mutex_);
+ SortHits();
+ }
+
+ // This downgrade from an unique_lock to a shared_lock is safe because we're
+ // searching for the term in the searchable (sorted) section of the HitBuffer
+ // only in Seek().
+ // Any operations that might execute in between the transition of downgrading
+ // the lock here are guaranteed not to alter the searchable section (or the
+ // LiteIndex due to a global lock in IcingSearchEngine).
+ absl_ports::shared_lock l(&mutex_);
+ for (uint32_t idx = Seek(term_id); idx < header_->searchable_end(); idx++) {
+ TermIdHitPair term_id_hit_pair =
+ hit_buffer_.array_cast<TermIdHitPair>()[idx];
if (term_id_hit_pair.term_id() != term_id) break;
const Hit& hit = term_id_hit_pair.hit();
@@ -422,7 +454,7 @@ libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
uint32_t term_id,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker) {
- return AppendHits(term_id, kSectionIdMaskAll,
+ return FetchHits(term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false, score_by,
suggestion_result_checker,
/*hits_out=*/nullptr);
@@ -434,6 +466,7 @@ bool LiteIndex::is_full() const {
}
std::string LiteIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) {
+ absl_ports::unique_lock l(&mutex_);
std::string res;
std::string lexicon_info;
lexicon_.GetDebugInfo(verbosity, &lexicon_info);
@@ -468,6 +501,7 @@ libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
IndexStorageInfoProto LiteIndex::GetStorageInfo(
IndexStorageInfoProto storage_info) const {
+ absl_ports::shared_lock l(&mutex_);
int64_t header_and_hit_buffer_file_size =
filesystem_->GetFileSize(hit_buffer_fd_.get());
storage_info.set_lite_index_hit_buffer_size(
@@ -512,9 +546,7 @@ void LiteIndex::SortHits() {
UpdateChecksum();
}
-uint32_t LiteIndex::Seek(uint32_t term_id) {
- SortHits();
-
+uint32_t LiteIndex::Seek(uint32_t term_id) const {
// Binary search for our term_id. Make sure we get the first
// element. Using kBeginSortValue ensures this for the hit value.
TermIdHitPair term_id_hit_pair(
@@ -522,14 +554,21 @@ uint32_t LiteIndex::Seek(uint32_t term_id) {
const TermIdHitPair::Value* array =
hit_buffer_.array_cast<TermIdHitPair::Value>();
+ if (header_->searchable_end() != header_->cur_size()) {
+ ICING_LOG(WARNING) << "Lite index: hit buffer searchable end != current "
+ << "size during Seek(): "
+ << header_->searchable_end() << " vs "
+ << header_->cur_size();
+ }
const TermIdHitPair::Value* ptr = std::lower_bound(
- array, array + header_->cur_size(), term_id_hit_pair.value());
+ array, array + header_->searchable_end(), term_id_hit_pair.value());
return ptr - array;
}
libtextclassifier3::Status LiteIndex::Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) {
+ absl_ports::unique_lock l(&mutex_);
header_->set_last_added_docid(new_last_added_document_id);
if (header_->cur_size() == 0) {
return libtextclassifier3::Status::OK;
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index e4fb686..c7255fd 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -27,6 +27,8 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/thread_annotations.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
@@ -52,6 +54,10 @@
namespace icing {
namespace lib {
+// The LiteIndex is go/thread-compatible. Operations on the same data member
+// object interfere with each other, unless they are guaranteed not to mutate
+// the object (In the case of LiteIndex, this means all const methods,
+// FetchHits and ScoreHits).
class LiteIndex {
public:
// An entry in the hit buffer.
@@ -72,25 +78,22 @@ class LiteIndex {
// Resets all internal members of the index. Returns OK if all operations were
// successful.
- libtextclassifier3::Status Reset();
+ libtextclassifier3::Status Reset() ICING_LOCKS_EXCLUDED(mutex_);
// Advises the OS to cache pages in the index, which will be accessed for a
// query soon.
- void Warm();
+ void Warm() ICING_LOCKS_EXCLUDED(mutex_);
// Syncs all modified files in the index to disk.
//
// Returns:
// OK on success
// INTERNAL on I/O error
- libtextclassifier3::Status PersistToDisk();
-
- // Calculate the checksum of all sub-components of the LiteIndex
- Crc32 ComputeChecksum();
+ libtextclassifier3::Status PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_);
// Returns term_id if term found, NOT_FOUND otherwise.
libtextclassifier3::StatusOr<uint32_t> GetTermId(
- const std::string& term) const;
+ const std::string& term) const ICING_LOCKS_EXCLUDED(mutex_);
// Returns an iterator for all terms for which 'prefix' is a prefix.
class PrefixIterator {
@@ -109,7 +112,11 @@ class LiteIndex {
IcingDynamicTrie::Iterator delegate_;
};
- PrefixIterator FindTermPrefixes(const std::string& prefix) const {
+ // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate any
+ // previously returned PrefixIterator.
+ PrefixIterator FindTermPrefixes(const std::string& prefix) const
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str()));
}
@@ -120,7 +127,7 @@ class LiteIndex {
// RESOURCE_EXHAUSTED if lexicon is full or no disk space is available
libtextclassifier3::StatusOr<uint32_t> InsertTerm(
const std::string& term, TermMatchType::Code term_match_type,
- NamespaceId namespace_id);
+ NamespaceId namespace_id) ICING_LOCKS_EXCLUDED(mutex_);
// Updates term properties by setting hasPrefixHits and namespace id of the
// term.
@@ -130,7 +137,8 @@ class LiteIndex {
// RESOURCE_EXHAUSTED if no disk space is available
libtextclassifier3::Status UpdateTermProperties(uint32_t tvi,
bool hasPrefixHits,
- NamespaceId namespace_id);
+ NamespaceId namespace_id)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Append hit to buffer. term_id must be encoded using the same term_id_codec
// supplied to the index constructor.
@@ -138,7 +146,8 @@ class LiteIndex {
// - OK if hit was successfully added
// - RESOURCE_EXHAUSTED if hit could not be added (either due to hit buffer
// or file system capacity reached).
- libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit);
+ libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Add all hits with term_id from the sections specified in section_id_mask,
// skipping hits in non-prefix sections if only_from_prefix_sections is true,
@@ -147,33 +156,35 @@ class LiteIndex {
// is nullptr.
//
// Only those hits which belongs to the given namespaces will be counted and
- // appended. A nullptr namespace checker will disable this check.
+ // fetched. A nullptr namespace checker will disable this check.
//
// Returns the score of hits that would be added to hits_out according the
// given score_by.
- int AppendHits(
+ int FetchHits(
uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker,
std::vector<DocHitInfo>* hits_out,
- std::vector<Hit::TermFrequencyArray>* term_frequency_out = nullptr);
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out = nullptr)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Returns the hit count of the term.
// Only those hits which belongs to the given namespaces will be counted.
libtextclassifier3::StatusOr<int> ScoreHits(
uint32_t term_id,
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
- const SuggestionResultChecker* suggestion_result_checker);
+ const SuggestionResultChecker* suggestion_result_checker)
+ ICING_LOCKS_EXCLUDED(mutex_);
- // Check if buffer has reached its capacity.
- bool is_full() const;
+ bool empty() const ICING_LOCKS_EXCLUDED(mutex_) { return size() == 0; }
- bool empty() const { return size() == 0; }
-
- uint32_t size() const { return header_->cur_size(); }
+ uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return header_->cur_size();
+ }
- bool WantsMerge() const {
+ bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) {
return size() >= (options_.hit_buffer_want_merge_bytes /
sizeof(TermIdHitPair::Value));
}
@@ -224,11 +235,13 @@ class LiteIndex {
int end_position_;
};
- const_iterator begin() const {
+ const_iterator begin() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
// If the LiteIndex is empty, just return end().
- return empty() ? end()
- : const_iterator(hit_buffer_.array_cast<TermIdHitPair>(), 0,
- header_->cur_size());
+ return empty_impl()
+ ? end()
+ : const_iterator(hit_buffer_.array_cast<TermIdHitPair>(), 0,
+ header_->cur_size());
}
const_iterator end() const { return const_iterator(); }
@@ -240,19 +253,25 @@ class LiteIndex {
// We keep track of the last added document_id. This is always the largest
// document_id that has been added because hits can only be added in order of
// increasing document_id.
- DocumentId last_added_document_id() const {
+ DocumentId last_added_document_id() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
return header_->last_added_docid();
}
- void set_last_added_document_id(DocumentId document_id) const {
+ void set_last_added_document_id(DocumentId document_id)
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::unique_lock l(&mutex_);
header_->set_last_added_docid(document_id);
}
+ // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate the reference
+ // returned here.
const IcingDynamicTrie& lexicon() const { return lexicon_; }
// Returns debug information for the index in out.
// verbosity = BASIC, simplest debug information - size of lexicon, hit buffer
// verbosity = DETAILED, more detailed debug information from the lexicon.
- std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity);
+ std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Returns the byte size of all the elements held in the index. This excludes
// the size of any internal metadata of the index, e.g. the index's header.
@@ -260,15 +279,16 @@ class LiteIndex {
// Returns:
// Byte size on success
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ libtextclassifier3::StatusOr<int64_t> GetElementsSize() const
+ ICING_LOCKS_EXCLUDED(mutex_);
// Takes the provided storage_info, populates the fields related to the lite
// index and returns that storage_info.
//
// If an IO error occurs while trying to calculate the value for a field, then
// that field will be set to -1.
- IndexStorageInfoProto GetStorageInfo(
- IndexStorageInfoProto storage_info) const;
+ IndexStorageInfoProto GetStorageInfo(IndexStorageInfoProto storage_info) const
+ ICING_LOCKS_EXCLUDED(mutex_);
// Reduces internal file sizes by reclaiming space of deleted documents.
//
@@ -281,7 +301,8 @@ class LiteIndex {
// invalid state and should be cleared.
libtextclassifier3::Status Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
- const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id);
+ const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id)
+ ICING_LOCKS_EXCLUDED(mutex_);
private:
static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
@@ -295,45 +316,78 @@ class LiteIndex {
// OK on success
// DATA_LOSS if the index was corrupted and cleared
// INTERNAL on I/O error
- libtextclassifier3::Status Initialize();
+ libtextclassifier3::Status Initialize() ICING_LOCKS_EXCLUDED(mutex_);
+
+ bool initialized() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_ != nullptr;
+ }
+
+ // Check if the hit buffer has reached its capacity.
+ bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_);
- bool initialized() const { return header_ != nullptr; }
+ // Non-locking implementation for empty().
+ bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return size_impl() == 0;
+ }
+
+ // Non-locking implementation for size().
+ bool size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size();
+ }
+
+ // Calculate the checksum of all sub-components of the LiteIndex
+ Crc32 ComputeChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sets the computed checksum in the header
- void UpdateChecksum();
+ void UpdateChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Non-locking implementation for UpdateTermProperties.
+ libtextclassifier3::Status UpdateTermPropertiesImpl(uint32_t tvi,
+ bool hasPrefixHits,
+ NamespaceId namespace_id)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Whether or not the HitBuffer requires sorting.
+ bool NeedSort() ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return header_->cur_size() - header_->searchable_end() > 0;
+ }
// Sort hits stored in the index.
- void SortHits();
+ void SortHits() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Returns the position of the first element with term_id, or the size of the
- // hit buffer if term_id is not present.
- uint32_t Seek(uint32_t term_id);
+ // Returns the position of the first element with term_id, or the searchable
+ // end of the hit buffer if term_id is not present.
+ uint32_t Seek(uint32_t term_id) const ICING_SHARED_LOCKS_REQUIRED(mutex_);
// File descriptor that points to where the header and hit buffer are written
// to.
- ScopedFd hit_buffer_fd_;
+ ScopedFd hit_buffer_fd_ ICING_GUARDED_BY(mutex_);
// Mmapped region past the header that stores the hits.
- IcingArrayStorage hit_buffer_;
+ IcingArrayStorage hit_buffer_ ICING_GUARDED_BY(mutex_);
// Crc checksum of the hits, excludes the header.
- uint32_t hit_buffer_crc_;
+ uint32_t hit_buffer_crc_ ICING_GUARDED_BY(mutex_);
// Trie that maps indexed terms to their term id
- IcingDynamicTrie lexicon_;
+ IcingDynamicTrie lexicon_ ICING_GUARDED_BY(mutex_);
// TODO(b/140437260): Port over to MemoryMappedFile
// Memory mapped region of the underlying file that reflects the header.
- IcingMMapper header_mmap_;
+ IcingMMapper header_mmap_ ICING_GUARDED_BY(mutex_);
// Wrapper around the mmapped header that contains stats on the lite index.
- std::unique_ptr<LiteIndex_Header> header_;
+ std::unique_ptr<LiteIndex_Header> header_ ICING_GUARDED_BY(mutex_);
// Options used to initialize the LiteIndex.
const Options options_;
// TODO(b/139087650) Move to icing::Filesystem
const IcingFilesystem* const filesystem_;
+
+ // Used to provide reader and writer locks
+ mutable absl_ports::shared_mutex mutex_;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index 2c29640..c3f52b1 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc
@@ -56,6 +56,8 @@ class LiteIndexTest : public testing::Test {
}
void TearDown() override {
+ term_id_codec_.reset();
+ lite_index_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
}
@@ -82,7 +84,7 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) {
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
std::vector<DocHitInfo> hits1;
- lite_index_->AppendHits(
+ lite_index_->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
@@ -94,7 +96,7 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) {
std::vector<DocHitInfo> hits2;
AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
- lite_index_->AppendHits(
+ lite_index_->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc
new file mode 100644
index 0000000..7711f92
--- /dev/null
+++ b/icing/index/lite/lite-index_thread-safety_test.cc
@@ -0,0 +1,400 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <array>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/schema/section.h"
+#include "icing/store/suggestion-result-checker.h"
+#include "icing/testing/always-false-suggestion-result-checker-impl.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+// These tests cover concurrent FetchHits operations, as well as interleaving
+// AddHit and FetchHits operations. Other usages of the LiteIndex other than
+// these scenarios are not guaranteed with to be thread-safe as the LiteIndex is
+// go/thread-compatible.
+class LiteIndexThreadSafetyTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ index_dir_ = GetTestTempDir() + "/test_dir";
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+
+ std::string lite_index_file_name =
+ index_dir_ + "/test_file.lite-idx-thread-safety.index";
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+ LiteIndex::Create(options, &icing_filesystem_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+ }
+
+ void TearDown() override {
+ term_id_codec_.reset();
+ lite_index_.reset();
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+ }
+
+ std::string index_dir_;
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ std::unique_ptr<LiteIndex> lite_index_;
+ std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+constexpr DocumentId kDocumentId0 = 0;
+constexpr DocumentId kDocumentId1 = 1;
+constexpr SectionId kSectionId0 = 1;
+constexpr SectionId kSectionId1 = 0b11;
+
+static constexpr std::array<std::string_view, 100> kCommonWords = {
+ "the", "and", "for", "that", "this", "with",
+ "you", "not", "are", "from", "your", "all",
+ "have", "new", "more", "was", "will", "home",
+ "can", "about", "page", "has", "search", "free",
+ "but", "our", "one", "other", "information", "time",
+ "they", "site", "may", "what", "which", "their",
+ "news", "out", "use", "any", "there", "see",
+ "only", "his", "when", "contact", "here", "business",
+ "who", "web", "also", "now", "help", "get",
+ "view", "online", "first", "been", "would", "how",
+ "were", "services", "some", "these", "click", "its",
+ "like", "service", "than", "find", "price", "date",
+ "back", "top", "people", "had", "list", "name",
+ "just", "over", "state", "year", "day", "into",
+ "email", "two", "health", "world", "next", "used",
+ "work", "last", "most", "products", "music", "buy",
+ "data", "make", "them", "should"};
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_singleTerm) {
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+
+ // Create kNumThreads threads to call lite_index_->FetchHits()
+ // simultaneously. Each thread should get a valid result of 2 hits for the
+ // term 'foo', and there should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ };
+ // Spawn threads for FetchHits().
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ EXPECT_THAT(
+ hits[i],
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId0}),
+ EqualsDocHitInfo(kDocumentId0,
+ std::vector<SectionId>{kSectionId0})));
+ }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_multipleTerms) {
+ // Add two hits for each of the first 50 terms in kCommonWords.
+ for (int i = 0; i < 50; ++i) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[i]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
+ }
+
+ // Create kNumThreads threads to call lite_index_->FetchHits()
+ // simultaneously. Each thread should get a valid result of 2 hits for each
+ // term, and there should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[thread_id]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ lite_index_->FetchHits(
+ term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ };
+
+ // Spawn threads for FetchHits().
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ EXPECT_THAT(
+ hits[i],
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId0}),
+ EqualsDocHitInfo(kDocumentId0,
+ std::vector<SectionId>{kSectionId0})));
+ }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, SimultaneousAddHitAndFetchHits_singleTerm) {
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+
+ // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
+ // every odd numbered thread calls AddHit.
+ // Each AddHit operation adds the term 'foo' to a new section of the same doc.
+ // Each query result should contain one hit, and there should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ if (thread_id % 2 == 0) {
+ // Even-numbered thread calls FetchHits.
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ } else {
+ // Odd-numbered thread calls AddHit.
+ Hit doc_hit(/*section_id=*/thread_id / 2, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
+ }
+ };
+
+ // Spawn threads.
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results.
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ // All AddHit operations add 'foo' to the same document, so there should
+ // only be one DocHitInfo per run.
+ if (i % 2 == 0) {
+ EXPECT_THAT(hits[i], SizeIs(1));
+ EXPECT_THAT(hits[i].back().document_id(), Eq(0));
+ }
+ }
+
+ // After all threads have executed, hits should come from sections 0-24.
+ std::vector<DocHitInfo> final_hits;
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &final_hits);
+ EXPECT_THAT(final_hits, SizeIs(1));
+ EXPECT_THAT(final_hits.back().document_id(), Eq(0));
+ // Section mask of sections 0-24.
+ EXPECT_THAT(final_hits.back().hit_section_ids_mask(), Eq((1 << 25) - 1));
+}
+
+TEST_F(LiteIndexThreadSafetyTest,
+ SimultaneousAddHitAndFetchHits_multipleTerms) {
+ // Add the initial hit 'foo'.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+
+ // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
+ // every odd numbered thread calls AddHit.
+ // Each AddHit operation adds a different term to a new doc.
+ // Queries always search for the term 'foo' added above so there will always
+ // be a hit. There should be no crash.
+ constexpr int kNumThreads = 50;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ // Create new tvi and term_id for new term kCommonWords[thread_id].
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[thread_id]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+ if (thread_id % 2 == 0) {
+ // Even-numbered thread calls FetchHits.
+ lite_index_->FetchHits(
+ foo_term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ } else {
+ // Odd-numbered thread calls AddHit.
+ // AddHit to section 0 of a new doc.
+ Hit doc_hit(/*section_id=*/kSectionId0, /*document_id=*/thread_id / 2,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
+ }
+ };
+
+ // Spawn threads.
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify results. Queries always search for the term 'foo'
+ // so there will always be a hit
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ if (i % 2 == 0) {
+ EXPECT_THAT(hits[i],
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId0})));
+ }
+ }
+}
+
+TEST_F(LiteIndexThreadSafetyTest, ManyAddHitAndOneFetchHits_multipleTerms) {
+ // Add two hits for each of the first 20 terms in kCommonWords.
+ for (int i = 0; i < 20; ++i) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[i]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ Hit doc_hit1(/*section_id=*/kSectionId1, /*document_id=*/kDocumentId0,
+ Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
+ }
+
+ // Create kNumThreads threads. Call one FetchHits operation after every 5
+ // AddHit operations.
+ // Each AddHit operation adds a different term to a new doc.
+ // Queries always search for the term 'foo' added above so there will always
+ // be a hit. There should be no crash.
+ constexpr int kNumThreads = 100;
+ std::vector<std::vector<DocHitInfo>> hits(kNumThreads);
+ auto callable = [&](int thread_id) {
+ // Create new tvi and term_id for new term kCommonWords[thread_id].
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm(std::string(kCommonWords[thread_id / 5]),
+ TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+ if (thread_id % 5 == 0) {
+ // Call FetchHits on term kCommonWords[thread_id / 5]
+ lite_index_->FetchHits(
+ term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits[thread_id]);
+ } else {
+ // Odd-numbered thread calls AddHit.
+ // AddHit to section (thread_id % 5 + 1) of doc 0.
+ Hit doc_hit(/*section_id=*/thread_id % 5 + 1,
+ /*document_id=*/kDocumentId0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
+ }
+ };
+ // Spawn threads.
+ std::vector<std::thread> thread_objs;
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs.emplace_back(callable, /*thread_id=*/i);
+ }
+
+ // Join threads and verify FetchHits results.
+ // Every query should see a hit in doc 0 sections 0 and 1. Additional hits
+ // might also be found in sections 2-6 depending on thread execution order.
+ for (int i = 0; i < kNumThreads; ++i) {
+ thread_objs[i].join();
+ if (i % 5 == 0) {
+ EXPECT_THAT(hits[i], SizeIs(1));
+ EXPECT_THAT(hits[i].back().document_id(), Eq(0));
+ EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Ge(0b11));
+ EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Le(0b1111111));
+ }
+ }
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index c16c5d1..816d46e 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -100,6 +100,8 @@ class MainIndexTest : public testing::Test {
}
void TearDown() override {
+ term_id_codec_.reset();
+ lite_index_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
}
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index a1d20f8..1b7b5ae 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -29,6 +29,8 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -36,43 +38,54 @@
#include "icing/index/numeric/numeric-index.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+// DummyNumericIndex: dummy class to help with testing and unblock e2e
+// integration for numeric search. It stores all numeric index data (keys and
+// hits) in memory without actual persistent storages. All PersistentStorage
+// features do not work as expected, i.e. they don't persist any data into disk
+// and therefore data are volatile.
template <typename T>
class DummyNumericIndex : public NumericIndex<T> {
public:
+ static libtextclassifier3::StatusOr<std::unique_ptr<DummyNumericIndex<T>>>
+ Create(const Filesystem& filesystem, std::string working_path) {
+ auto dummy_numeric_index = std::unique_ptr<DummyNumericIndex<T>>(
+ new DummyNumericIndex<T>(filesystem, std::move(working_path)));
+ ICING_RETURN_IF_ERROR(dummy_numeric_index->InitializeNewStorage());
+ return dummy_numeric_index;
+ }
+
~DummyNumericIndex() override = default;
std::unique_ptr<typename NumericIndex<T>::Editor> Edit(
- std::string_view property_name, DocumentId document_id,
+ std::string_view property_path, DocumentId document_id,
SectionId section_id) override {
- return std::make_unique<Editor>(property_name, document_id, section_id,
+ return std::make_unique<Editor>(property_path, document_id, section_id,
storage_);
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- std::string_view property_name, T key_lower, T key_upper) const override;
+ std::string_view property_path, T key_lower, T key_upper) const override;
libtextclassifier3::Status Reset() override {
storage_.clear();
return libtextclassifier3::Status::OK;
}
- libtextclassifier3::Status PersistToDisk() override {
- return libtextclassifier3::Status::OK;
- }
-
private:
class Editor : public NumericIndex<T>::Editor {
public:
explicit Editor(
- std::string_view property_name, DocumentId document_id,
+ std::string_view property_path, DocumentId document_id,
SectionId section_id,
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>>&
storage)
- : NumericIndex<T>::Editor(property_name, document_id, section_id),
+ : NumericIndex<T>::Editor(property_path, document_id, section_id),
storage_(storage) {}
~Editor() override = default;
@@ -147,20 +160,46 @@ class DummyNumericIndex : public NumericIndex<T> {
DocHitInfo doc_hit_info_;
};
+ private:
+ explicit DummyNumericIndex(const Filesystem& filesystem,
+ std::string&& working_path)
+ : NumericIndex<T>(filesystem, std::move(working_path),
+ PersistentStorage::WorkingPathType::kDummy) {}
+
+ libtextclassifier3::Status PersistStoragesToDisk() override {
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::Status PersistMetadataToDisk() override {
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override {
+ return Crc32(0);
+ }
+
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override {
+ return Crc32(0);
+ }
+
+ PersistentStorage::Crcs& crcs() override { return dummy_crcs_; }
+ const PersistentStorage::Crcs& crcs() const override { return dummy_crcs_; }
+
std::unordered_map<std::string, std::map<T, std::vector<BasicHit>>> storage_;
+ PersistentStorage::Crcs dummy_crcs_;
};
template <typename T>
libtextclassifier3::Status
DummyNumericIndex<T>::Editor::IndexAllBufferedKeys() {
- auto property_map_iter = storage_.find(this->property_name_);
+ auto property_map_iter = storage_.find(this->property_path_);
if (property_map_iter == storage_.end()) {
const auto& [inserted_iter, insert_result] =
- storage_.insert({this->property_name_, {}});
+ storage_.insert({this->property_path_, {}});
if (!insert_result) {
return absl_ports::InternalError(
absl_ports::StrCat("Failed to create a new map for property \"",
- this->property_name_, "\""));
+ this->property_path_, "\""));
}
property_map_iter = inserted_iter;
}
@@ -207,17 +246,17 @@ libtextclassifier3::Status DummyNumericIndex<T>::Iterator::Advance() {
template <typename T>
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-DummyNumericIndex<T>::GetIterator(std::string_view property_name, T key_lower,
+DummyNumericIndex<T>::GetIterator(std::string_view property_path, T key_lower,
T key_upper) const {
if (key_lower > key_upper) {
return absl_ports::InvalidArgumentError(
"key_lower should not be greater than key_upper");
}
- auto property_map_iter = storage_.find(std::string(property_name));
+ auto property_map_iter = storage_.find(std::string(property_path));
if (property_map_iter == storage_.end()) {
return absl_ports::NotFoundError(
- absl_ports::StrCat("Property \"", property_name, "\" not found"));
+ absl_ports::StrCat("Property \"", property_path, "\" not found"));
}
std::vector<typename Iterator::BucketInfo> bucket_info_vec;
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index fa8fa3e..0233b38 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -48,102 +48,26 @@ namespace lib {
namespace {
-// Helper function to PWrite crcs and info to metadata_file_path.
-libtextclassifier3::Status WriteMetadata(
- const Filesystem& filesystem, const std::string& metadata_file_path,
- const IntegerIndexStorage::Crcs* crcs,
- const IntegerIndexStorage::Info* info) {
- ScopedFd sfd(filesystem.OpenForWrite(metadata_file_path.c_str()));
- if (!sfd.is_valid()) {
- return absl_ports::InternalError("Failed to create metadata file");
- }
-
- // Write crcs and info. File layout: <Crcs><Info>
- ICING_RETURN_IF_ERROR(crcs->Serialize(filesystem, sfd.get()));
- ICING_RETURN_IF_ERROR(info->Serialize(filesystem, sfd.get()));
-
- return libtextclassifier3::Status::OK;
-}
-
-// Helper function to update checksums from info and storages to a Crcs
-// instance.
-libtextclassifier3::Status UpdateChecksums(
- IntegerIndexStorage::Crcs* crcs, IntegerIndexStorage::Info* info,
- FileBackedVector<IntegerIndexStorage::Bucket>* sorted_buckets,
- FileBackedVector<IntegerIndexStorage::Bucket>* unsorted_buckets,
- FlashIndexStorage* flash_index_storage) {
- // Compute crcs
- ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
- sorted_buckets->ComputeChecksum());
- ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
- unsorted_buckets->ComputeChecksum());
-
- crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
- crcs->component_crcs.sorted_buckets_crc = sorted_buckets_crc.Get();
- crcs->component_crcs.unsorted_buckets_crc = unsorted_buckets_crc.Get();
- // TODO(b/259744228): implement and update flash_index_storage checksum
- crcs->component_crcs.flash_index_storage_crc = 0;
- crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
-
- return libtextclassifier3::Status::OK;
-}
-
-// Helper function to validate checksums.
-libtextclassifier3::Status ValidateChecksums(
- const IntegerIndexStorage::Crcs* crcs,
- const IntegerIndexStorage::Info* info,
- FileBackedVector<IntegerIndexStorage::Bucket>* sorted_buckets,
- FileBackedVector<IntegerIndexStorage::Bucket>* unsorted_buckets,
- FlashIndexStorage* flash_index_storage) {
- if (crcs->all_crc != crcs->component_crcs.ComputeChecksum().Get()) {
- return absl_ports::FailedPreconditionError(
- "Invalid all crc for IntegerIndexStorage");
- }
-
- if (crcs->component_crcs.info_crc != info->ComputeChecksum().Get()) {
- return absl_ports::FailedPreconditionError(
- "Invalid info crc for IntegerIndexStorage");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
- sorted_buckets->ComputeChecksum());
- if (crcs->component_crcs.sorted_buckets_crc != sorted_buckets_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with IntegerIndexStorage sorted buckets");
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
- unsorted_buckets->ComputeChecksum());
- if (crcs->component_crcs.unsorted_buckets_crc != unsorted_buckets_crc.Get()) {
- return absl_ports::FailedPreconditionError(
- "Mismatch crc with IntegerIndexStorage unsorted buckets");
- }
-
- // TODO(b/259744228): implement and verify flash_index_storage checksum
-
- return libtextclassifier3::Status::OK;
-}
-
// The following 4 methods are helper functions to get the correct file path of
// metadata/sorted_buckets/unsorted_buckets/flash_index_storage, according to
// the given working directory.
-std::string GetMetadataFilePath(std::string_view working_dir) {
- return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
".m");
}
-std::string GetSortedBucketsFilePath(std::string_view working_dir) {
- return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+std::string GetSortedBucketsFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
".s");
}
-std::string GetUnsortedBucketsFilePath(std::string_view working_dir) {
- return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+std::string GetUnsortedBucketsFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
".u");
}
-std::string GetFlashIndexStorageFilePath(std::string_view working_dir) {
- return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+std::string GetFlashIndexStorageFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix,
".f");
}
@@ -358,7 +282,7 @@ bool IntegerIndexStorage::Options::IsValid() const {
return false;
}
std::sort(buckets.begin(), buckets.end());
- int64_t expected_lower = std::numeric_limits<int64_t>::min();
+ int64_t prev_upper = std::numeric_limits<int64_t>::min();
for (int i = 0; i < buckets.size(); ++i) {
// key_lower should not be greater than key_upper and init bucket should
// have invalid posting list identifier.
@@ -367,46 +291,46 @@ bool IntegerIndexStorage::Options::IsValid() const {
return false;
}
- if (buckets[i].key_lower() != expected_lower) {
+ // Previous upper bound should not be INT64_MAX since it is not the last
+ // bucket.
+ if (prev_upper == std::numeric_limits<int64_t>::max()) {
return false;
}
- // If it is the last bucket, then key_upper should be INT64_MAX. Otherwise
- // it should not be INT64_MAX. Use XOR for this logic.
- if ((buckets[i].key_upper() == std::numeric_limits<int64_t>::max()) ^
- (i == buckets.size() - 1)) {
+ int64_t expected_lower =
+ (i == 0 ? std::numeric_limits<int64_t>::min() : prev_upper + 1);
+ if (buckets[i].key_lower() != expected_lower) {
return false;
}
- expected_lower = buckets[i].key_upper() + 1;
+
+ prev_upper = buckets[i].key_upper();
}
- return true;
+ return prev_upper == std::numeric_limits<int64_t>::max();
}
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
IntegerIndexStorage::Create(
- const Filesystem& filesystem, std::string_view base_dir, Options options,
+ const Filesystem& filesystem, std::string working_path, Options options,
PostingListIntegerIndexSerializer* posting_list_serializer) {
if (!options.IsValid()) {
return absl_ports::InvalidArgumentError(
"Invalid IntegerIndexStorage options");
}
- std::string working_dir = absl_ports::StrCat(base_dir, "/", kSubDirectory);
- if (!filesystem.FileExists(GetMetadataFilePath(working_dir).c_str()) ||
- !filesystem.FileExists(GetSortedBucketsFilePath(working_dir).c_str()) ||
- !filesystem.FileExists(GetUnsortedBucketsFilePath(working_dir).c_str()) ||
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(GetSortedBucketsFilePath(working_path).c_str()) ||
!filesystem.FileExists(
- GetFlashIndexStorageFilePath(working_dir).c_str())) {
- // Delete working_dir if any of them is missing, and reinitialize.
- if (!filesystem.DeleteDirectoryRecursively(working_dir.c_str())) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Failed to delete directory: ", working_dir));
- }
- return InitializeNewFiles(filesystem, std::move(working_dir),
+ GetUnsortedBucketsFilePath(working_path).c_str()) ||
+ !filesystem.FileExists(
+ GetFlashIndexStorageFilePath(working_path).c_str())) {
+ // Discard working_path if any of them is missing, and reinitialize.
+ ICING_RETURN_IF_ERROR(
+ PersistentStorage::Discard(filesystem, working_path, kWorkingPathType));
+ return InitializeNewFiles(filesystem, std::move(working_path),
std::move(options), posting_list_serializer);
}
- return InitializeExistingFiles(filesystem, std::move(working_dir),
+ return InitializeExistingFiles(filesystem, std::move(working_path),
std::move(options), posting_list_serializer);
}
@@ -414,7 +338,7 @@ IntegerIndexStorage::~IntegerIndexStorage() {
if (!PersistToDisk().ok()) {
ICING_LOG(WARNING)
<< "Failed to persist hash map to disk while destructing "
- << working_dir_;
+ << working_path_;
}
}
@@ -645,33 +569,15 @@ IntegerIndexStorage::GetIterator(int64_t query_key_lower,
query_key_lower, query_key_upper, std::move(bucket_pl_iters)));
}
-libtextclassifier3::Status IntegerIndexStorage::PersistToDisk() {
- ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
- ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
- if (!flash_index_storage_->PersistToDisk()) {
- return absl_ports::InternalError(
- "Fail to persist FlashIndexStorage to disk");
- }
-
- ICING_RETURN_IF_ERROR(UpdateChecksums(crcs(), info(), sorted_buckets_.get(),
- unsorted_buckets_.get(),
- flash_index_storage_.get()));
- // Changes should have been applied to the underlying file when using
- // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
- // extra safety step to ensure they are written out.
- ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
-
- return libtextclassifier3::Status::OK;
-}
-
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
IntegerIndexStorage::InitializeNewFiles(
- const Filesystem& filesystem, std::string&& working_dir, Options&& options,
+ const Filesystem& filesystem, std::string&& working_path, Options&& options,
PostingListIntegerIndexSerializer* posting_list_serializer) {
+ // IntegerIndexStorage uses working_path as working directory path.
// Create working directory.
- if (!filesystem.CreateDirectoryRecursively(working_dir.c_str())) {
+ if (!filesystem.CreateDirectory(working_path.c_str())) {
return absl_ports::InternalError(
- absl_ports::StrCat("Failed to create directory: ", working_dir));
+ absl_ports::StrCat("Failed to create directory: ", working_path));
}
// TODO(b/259743562): [Optimization 1] decide max # buckets, unsorted buckets
@@ -683,7 +589,7 @@ IntegerIndexStorage::InitializeNewFiles(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
FileBackedVector<Bucket>::Create(
- filesystem, GetSortedBucketsFilePath(working_dir),
+ filesystem, GetSortedBucketsFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
pre_mapping_mmap_size));
@@ -694,14 +600,14 @@ IntegerIndexStorage::InitializeNewFiles(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
FileBackedVector<Bucket>::Create(
- filesystem, GetUnsortedBucketsFilePath(working_dir),
+ filesystem, GetUnsortedBucketsFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
pre_mapping_mmap_size));
// Initialize flash_index_storage
ICING_ASSIGN_OR_RETURN(
FlashIndexStorage flash_index_storage,
- FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_dir),
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
&filesystem, posting_list_serializer));
if (options.HasCustomInitBuckets()) {
@@ -736,47 +642,45 @@ IntegerIndexStorage::InitializeNewFiles(
}
ICING_RETURN_IF_ERROR(sorted_buckets->PersistToDisk());
- // Create and initialize new info
- Info new_info;
- new_info.magic = Info::kMagic;
- new_info.num_keys = 0;
-
- // Compute checksums
- Crcs new_crcs;
- ICING_RETURN_IF_ERROR(
- UpdateChecksums(&new_crcs, &new_info, sorted_buckets.get(),
- unsorted_buckets.get(), &flash_index_storage));
-
- const std::string metadata_file_path = GetMetadataFilePath(working_dir);
- // Write new metadata file
- ICING_RETURN_IF_ERROR(
- WriteMetadata(filesystem, metadata_file_path, &new_crcs, &new_info));
-
- // Mmap the content of the crcs and info.
+ // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
ICING_ASSIGN_OR_RETURN(
MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(filesystem, metadata_file_path,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
/*max_file_size=*/kMetadataFileSize,
/*pre_mapping_file_offset=*/0,
/*pre_mapping_mmap_size=*/kMetadataFileSize));
-
- return std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
- filesystem, std::move(working_dir), std::move(options),
- posting_list_serializer,
- std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
- std::move(sorted_buckets), std::move(unsorted_buckets),
- std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+ /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+ // Create instance.
+ auto new_integer_index_storage =
+ std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+ filesystem, std::move(working_path), std::move(options),
+ posting_list_serializer,
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(sorted_buckets), std::move(unsorted_buckets),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+ // Initialize info content by writing mapped memory directly.
+ Info& info_ref = new_integer_index_storage->info();
+ info_ref.magic = Info::kMagic;
+ info_ref.num_keys = 0;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_integer_index_storage->InitializeNewStorage());
+
+ return new_integer_index_storage;
}
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
IntegerIndexStorage::InitializeExistingFiles(
- const Filesystem& filesystem, std::string&& working_dir, Options&& options,
+ const Filesystem& filesystem, std::string&& working_path, Options&& options,
PostingListIntegerIndexSerializer* posting_list_serializer) {
// Mmap the content of the crcs and info.
ICING_ASSIGN_OR_RETURN(
MemoryMappedFile metadata_mmapped_file,
- MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_dir),
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
/*max_file_size=*/kMetadataFileSize,
/*pre_mapping_file_offset=*/0,
@@ -791,7 +695,7 @@ IntegerIndexStorage::InitializeExistingFiles(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
FileBackedVector<Bucket>::Create(
- filesystem, GetSortedBucketsFilePath(working_dir),
+ filesystem, GetSortedBucketsFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
pre_mapping_mmap_size));
@@ -802,31 +706,67 @@ IntegerIndexStorage::InitializeExistingFiles(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
FileBackedVector<Bucket>::Create(
- filesystem, GetUnsortedBucketsFilePath(working_dir),
+ filesystem, GetUnsortedBucketsFilePath(working_path),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
pre_mapping_mmap_size));
// Initialize flash_index_storage
ICING_ASSIGN_OR_RETURN(
FlashIndexStorage flash_index_storage,
- FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_dir),
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
&filesystem, posting_list_serializer));
- Crcs* crcs_ptr = reinterpret_cast<Crcs*>(
- metadata_mmapped_file.mutable_region() + Crcs::kFileOffset);
- Info* info_ptr = reinterpret_cast<Info*>(
- metadata_mmapped_file.mutable_region() + Info::kFileOffset);
- // Validate checksums of info and 3 storages.
- ICING_RETURN_IF_ERROR(
- ValidateChecksums(crcs_ptr, info_ptr, sorted_buckets.get(),
- unsorted_buckets.get(), &flash_index_storage));
-
- return std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
- filesystem, std::move(working_dir), std::move(options),
- posting_list_serializer,
- std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
- std::move(sorted_buckets), std::move(unsorted_buckets),
- std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+ // Create instance.
+ auto integer_index_storage =
+ std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+ filesystem, std::move(working_path), std::move(options),
+ posting_list_serializer,
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(sorted_buckets), std::move(unsorted_buckets),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(integer_index_storage->InitializeExistingStorage());
+
+ // Validate other values of info and options.
+ // Magic should be consistent with the codebase.
+ if (integer_index_storage->info().magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ return integer_index_storage;
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk() {
+ ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
+ if (!flash_index_storage_->PersistToDisk()) {
+ return absl_ports::InternalError(
+ "Fail to persist FlashIndexStorage to disk");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistMetadataToDisk() {
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndexStorage::ComputeInfoChecksum() {
+ return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+IntegerIndexStorage::ComputeStoragesChecksum() {
+ // Compute crcs
+ ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
+ sorted_buckets_->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
+ unsorted_buckets_->ComputeChecksum());
+
+ // TODO(b/259744228): implement and include flash_index_storage checksum
+ return Crc32(sorted_buckets_crc.Get() ^ unsorted_buckets_crc.Get());
}
libtextclassifier3::StatusOr<std::vector<IntegerIndexStorage::Bucket>>
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
index 562060b..bef8282 100644
--- a/icing/index/numeric/integer-index-storage.h
+++ b/icing/index/numeric/integer-index-storage.h
@@ -26,6 +26,7 @@
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -70,66 +71,14 @@ namespace lib {
// choose sorted/unsorted bucket array.
// - Then we do binary search on the sorted bucket array and sequential search
// on the unsorted bucket array.
-class IntegerIndexStorage {
+class IntegerIndexStorage : public PersistentStorage {
public:
- // Crcs and Info will be written into the metadata file.
- // File layout: <Crcs><Info>
- // Crcs
- struct Crcs {
- static constexpr int32_t kFileOffset = 0;
-
- struct ComponentCrcs {
- uint32_t info_crc;
- uint32_t sorted_buckets_crc;
- uint32_t unsorted_buckets_crc;
- uint32_t flash_index_storage_crc;
-
- bool operator==(const ComponentCrcs& other) const {
- return info_crc == other.info_crc &&
- sorted_buckets_crc == other.sorted_buckets_crc &&
- unsorted_buckets_crc == other.unsorted_buckets_crc &&
- flash_index_storage_crc == other.flash_index_storage_crc;
- }
-
- Crc32 ComputeChecksum() const {
- return Crc32(std::string_view(reinterpret_cast<const char*>(this),
- sizeof(ComponentCrcs)));
- }
- } __attribute__((packed));
-
- libtextclassifier3::Status Serialize(const Filesystem& filesystem,
- int fd) const {
- if (!filesystem.PWrite(fd, kFileOffset, this, sizeof(*this))) {
- return absl_ports::InternalError("Failed to write crcs into file");
- }
- return libtextclassifier3::Status::OK;
- }
-
- bool operator==(const Crcs& other) const {
- return all_crc == other.all_crc && component_crcs == other.component_crcs;
- }
-
- uint32_t all_crc;
- ComponentCrcs component_crcs;
- } __attribute__((packed));
- static_assert(sizeof(Crcs) == 20, "");
-
- // Info
struct Info {
- static constexpr int32_t kFileOffset = static_cast<int32_t>(sizeof(Crcs));
static constexpr int32_t kMagic = 0xc4bf0ccc;
int32_t magic;
int32_t num_keys;
- libtextclassifier3::Status Serialize(const Filesystem& filesystem,
- int fd) const {
- if (!filesystem.PWrite(fd, kFileOffset, this, sizeof(*this))) {
- return absl_ports::InternalError("Failed to write info into file");
- }
- return libtextclassifier3::Status::OK;
- }
-
Crc32 ComputeChecksum() const {
return Crc32(
std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
@@ -137,9 +86,6 @@ class IntegerIndexStorage {
} __attribute__((packed));
static_assert(sizeof(Info) == 8, "");
- static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
- static_assert(kMetadataFileSize == 28);
-
// Bucket
class Bucket {
public:
@@ -219,19 +165,34 @@ class IntegerIndexStorage {
std::vector<Bucket> custom_init_unsorted_buckets;
};
- static constexpr std::string_view kSubDirectory = "storage_dir";
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataFileOffset = 0;
+ static constexpr int32_t kInfoMetadataFileOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 20, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
static constexpr std::string_view kFilePrefix = "integer_index_storage";
- // Creates a new IntegerIndexStorage instance to index integers. For directory
- // management purpose, we define working_dir as "<base_dir>/storage_dir", and
- // all underlying files will be stored under it. If any of the underlying file
- // is missing, then delete the whole working_dir and (re)initialize with new
- // ones. Otherwise initialize and create the instance by existing files.
+ // Creates a new IntegerIndexStorage instance to index integers (for a single
+ // property). If any of the underlying file is missing, then delete the whole
+ // working_path and (re)initialize with new ones. Otherwise initialize and
+ // create the instance by existing files.
//
// filesystem: Object to make system level calls
- // base_dir: Specifies the base directory for all integer index data related
- // files to be stored. As mentioned above, all files will be stored
- // under working_dir (which is "<base_dir>/storage_dir").
+ // working_path: Specifies the working path for PersistentStorage.
+ // IntegerIndexStorage uses working path as working directory
+ // and all related files will be stored under this directory. It
+ // takes full ownership and of working_path_, including
+ // creation/deletion. It is the caller's responsibility to
+ // specify correct working path and avoid mixing different
+ // persistent storages together under the same path. Also the
+ // caller has the ownership for the parent directory of
+ // working_path_, and it is responsible for parent directory
+ // creation/deletion. See PersistentStorage for more details
+ // about the concept of working_path.
// options: Options instance.
// posting_list_serializer: a PostingListIntegerIndexSerializer instance to
// serialize/deserialize integer index data to/from
@@ -244,10 +205,21 @@ class IntegerIndexStorage {
// - INTERNAL_ERROR on I/O errors.
// - Any FileBackedVector/FlashIndexStorage errors.
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- Create(const Filesystem& filesystem, std::string_view base_dir,
+ Create(const Filesystem& filesystem, std::string working_path,
Options options,
PostingListIntegerIndexSerializer* posting_list_serializer);
+ // Deletes IntegerIndexStorage under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
// Delete copy and move constructor/assignment operator.
IntegerIndexStorage(const IntegerIndexStorage&) = delete;
IntegerIndexStorage& operator=(const IntegerIndexStorage&) = delete;
@@ -255,7 +227,7 @@ class IntegerIndexStorage {
IntegerIndexStorage(IntegerIndexStorage&&) = delete;
IntegerIndexStorage& operator=(IntegerIndexStorage&&) = delete;
- ~IntegerIndexStorage();
+ ~IntegerIndexStorage() override;
// Batch adds new keys (of the same DocumentId and SectionId) into the integer
// index storage.
@@ -286,36 +258,17 @@ class IntegerIndexStorage {
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
int64_t query_key_lower, int64_t query_key_upper) const;
- // Flushes content to underlying files.
- //
- // Returns:
- // - OK on success
- // - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistToDisk();
-
private:
- static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- InitializeNewFiles(
- const Filesystem& filesystem, std::string&& working_dir,
- Options&& options,
- PostingListIntegerIndexSerializer* posting_list_serializer);
-
- static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- InitializeExistingFiles(
- const Filesystem& filesystem, std::string&& working_dir,
- Options&& options,
- PostingListIntegerIndexSerializer* posting_list_serializer);
-
explicit IntegerIndexStorage(
- const Filesystem& filesystem, std::string&& working_dir,
+ const Filesystem& filesystem, std::string&& working_path,
Options&& options,
PostingListIntegerIndexSerializer* posting_list_serializer,
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
std::unique_ptr<FlashIndexStorage> flash_index_storage)
- : filesystem_(filesystem),
- working_dir_(std::move(working_dir)),
+ : PersistentStorage(filesystem, std::move(working_path),
+ kWorkingPathType),
options_(std::move(options)),
posting_list_serializer_(posting_list_serializer),
metadata_mmapped_file_(std::move(metadata_mmapped_file)),
@@ -323,6 +276,46 @@ class IntegerIndexStorage {
unsorted_buckets_(std::move(unsorted_buckets)),
flash_index_storage_(std::move(flash_index_storage)) {}
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ InitializeNewFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk() override;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk() override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+
+ // Computes and returns all storages checksum. Checksums of bucket_storage_,
+ // entry_storage_ and kv_storage_ will be combined together by XOR.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+
// Helper function to add keys in range [it_start, it_end) into the given
// bucket. It handles the bucket and its corresponding posting list(s) to make
// searching and indexing efficient.
@@ -352,23 +345,25 @@ class IntegerIndexStorage {
const std::vector<int64_t>::const_iterator& it_end,
FileBackedVector<Bucket>::MutableView& mutable_bucket);
- Crcs* crcs() {
- return reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
- Crcs::kFileOffset);
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ kCrcsMetadataFileOffset);
}
- Info* info() {
- return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
- Info::kFileOffset);
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+ kCrcsMetadataFileOffset);
}
- const Info* info() const {
- return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
- Info::kFileOffset);
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
}
- const Filesystem& filesystem_;
- std::string working_dir_;
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
+ }
Options options_;
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
index 0afc96b..92fb912 100644
--- a/icing/index/numeric/integer-index-storage_test.cc
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -25,6 +25,8 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/persistent-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -48,9 +50,10 @@ using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
using ::testing::Ne;
+using ::testing::Not;
using Bucket = IntegerIndexStorage::Bucket;
-using Crcs = IntegerIndexStorage::Crcs;
+using Crcs = PersistentStorage::Crcs;
using Info = IntegerIndexStorage::Info;
using Options = IntegerIndexStorage::Options;
@@ -61,7 +64,11 @@ static constexpr SectionId kDefaultSectionId = 31;
class IntegerIndexStorageTest : public ::testing::Test {
protected:
void SetUp() override {
- base_dir_ = GetTestTempDir() + "/integer_index_storage_test";
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/integer_index_storage_test";
serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
}
@@ -73,6 +80,7 @@ class IntegerIndexStorageTest : public ::testing::Test {
Filesystem filesystem_;
std::string base_dir_;
+ std::string working_path_;
std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
};
@@ -191,9 +199,10 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) {
IsFalse());
}
-TEST_F(IntegerIndexStorageTest, InvalidBaseDir) {
- EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, "/dev/null", Options(),
- serializer_.get()),
+TEST_F(IntegerIndexStorageTest, InvalidWorkingPath) {
+ EXPECT_THAT(IntegerIndexStorage::Create(
+ filesystem_, "/dev/null/integer_index_storage_test",
+ Options(), serializer_.get()),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -205,7 +214,7 @@ TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) {
Bucket(std::numeric_limits<int64_t>::min(), -100)});
ASSERT_THAT(invalid_options.IsValid(), IsFalse());
- EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, base_dir_,
+ EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_,
invalid_options, serializer_.get()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -213,10 +222,10 @@ TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) {
TEST_F(IntegerIndexStorageTest, InitializeNewFiles) {
{
// Create new integer index storage
- ASSERT_FALSE(filesystem_.DirectoryExists(base_dir_.c_str()));
+ ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
ICING_ASSERT_OK(storage->PersistToDisk());
@@ -224,29 +233,25 @@ TEST_F(IntegerIndexStorageTest, InitializeNewFiles) {
// Metadata file should be initialized correctly for both info and crcs
// sections.
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
- "/", IntegerIndexStorage::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
// Check info section
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
+ IntegerIndexStorage::kInfoMetadataFileOffset));
EXPECT_THAT(info.magic, Eq(Info::kMagic));
EXPECT_THAT(info.num_keys, Eq(0));
// Check crcs section
Crcs crcs;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ IntegerIndexStorage::kCrcsMetadataFileOffset));
// # of elements in sorted_buckets should be 1, so it should have non-zero
- // crc value.
- EXPECT_THAT(crcs.component_crcs.sorted_buckets_crc, Ne(0));
- // Other empty file backed vectors should have 0 crc value.
- EXPECT_THAT(crcs.component_crcs.unsorted_buckets_crc, Eq(0));
- EXPECT_THAT(crcs.component_crcs.flash_index_storage_crc, Eq(0));
+ // all storages crc value.
+ EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0));
EXPECT_THAT(crcs.component_crcs.info_crc,
Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
sizeof(Info)))
@@ -263,7 +268,7 @@ TEST_F(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
// Insert some data.
@@ -276,7 +281,7 @@ TEST_F(IntegerIndexStorageTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
- EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -285,7 +290,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage1,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
// Insert some data.
@@ -308,7 +313,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage2,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
EXPECT_THAT(
Query(storage2.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
@@ -323,7 +328,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -340,7 +345,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
// we should be able to get the same contents.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
EXPECT_THAT(
Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
@@ -356,7 +361,7 @@ TEST_F(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -364,32 +369,32 @@ TEST_F(IntegerIndexStorageTest,
ICING_ASSERT_OK(storage->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
- "/", IntegerIndexStorage::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Crcs crcs;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ IntegerIndexStorage::kCrcsMetadataFileOffset));
// Manually corrupt all_crc
crcs.all_crc += kCorruptedValueOffset;
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndexStorage::kCrcsMetadataFileOffset,
+ &crcs, sizeof(Crcs)));
metadata_sfd.reset();
{
// Attempt to create the integer index storage with metadata containing
// corrupted all_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ storage_or = IntegerIndexStorage::Create(filesystem_, working_path_,
Options(), serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(storage_or.status().error_message(),
- HasSubstr("Invalid all crc for IntegerIndexStorage"));
+ HasSubstr("Invalid all crc"));
}
}
@@ -399,7 +404,7 @@ TEST_F(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -407,41 +412,41 @@ TEST_F(IntegerIndexStorageTest,
ICING_ASSERT_OK(storage->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
- "/", IntegerIndexStorage::kFilePrefix, ".m");
+ const std::string metadata_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m");
ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
ASSERT_TRUE(metadata_sfd.is_valid());
Info info;
ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
- Info::kFileOffset));
+ IntegerIndexStorage::kInfoMetadataFileOffset));
// Modify info, but don't update the checksum. This would be similar to
// corruption of info.
info.num_keys += kCorruptedValueOffset;
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Info::kFileOffset, &info,
- sizeof(Info)));
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(),
+ IntegerIndexStorage::kInfoMetadataFileOffset,
+ &info, sizeof(Info)));
{
// Attempt to create the integer index storage with info that doesn't match
// its checksum and confirm that it fails.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ storage_or = IntegerIndexStorage::Create(filesystem_, working_path_,
Options(), serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(storage_or.status().error_message(),
- HasSubstr("Invalid info crc for IntegerIndexStorage"));
+ HasSubstr("Invalid info crc"));
}
}
TEST_F(IntegerIndexStorageTest,
- InitializeExistingFilesWithWrongSortedBucketsCrcShouldFail) {
+ InitializeExistingFilesWithCorruptedSortedBucketsShouldFail) {
{
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -449,42 +454,45 @@ TEST_F(IntegerIndexStorageTest,
ICING_ASSERT_OK(storage->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
- "/", IntegerIndexStorage::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Corrupt sorted buckets manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ sorted_buckets->ComputeChecksum());
+ ICING_ASSERT_OK(sorted_buckets->Append(Bucket(
+ /*key_lower=*/0, /*key_upper=*/std::numeric_limits<int64_t>::max())));
+ ICING_ASSERT_OK(sorted_buckets->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ sorted_buckets->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt sorted_buckets_crc
- crcs.component_crcs.sorted_buckets_crc += kCorruptedValueOffset;
- crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the integer index storage with metadata containing
// corrupted sorted_buckets_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ storage_or = IntegerIndexStorage::Create(filesystem_, working_path_,
Options(), serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- storage_or.status().error_message(),
- HasSubstr("Mismatch crc with IntegerIndexStorage sorted buckets"));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
}
}
TEST_F(IntegerIndexStorageTest,
- InitializeExistingFilesWithWrongUnsortedBucketsCrcShouldFail) {
+ InitializeExistingFilesWithCorruptedUnsortedBucketsShouldFail) {
{
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -492,42 +500,47 @@ TEST_F(IntegerIndexStorageTest,
ICING_ASSERT_OK(storage->PersistToDisk());
}
- const std::string metadata_file_path =
- absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
- "/", IntegerIndexStorage::kFilePrefix, ".m");
- ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
- ASSERT_TRUE(metadata_sfd.is_valid());
-
- Crcs crcs;
- ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
- Crcs::kFileOffset));
+ {
+ // Corrupt unsorted buckets manually.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/sizeof(Bucket) * 100 +
+ FileBackedVector<Bucket>::Header::kHeaderSize));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc,
+ unsorted_buckets->ComputeChecksum());
+ ICING_ASSERT_OK(unsorted_buckets->Append(Bucket(
+ /*key_lower=*/0, /*key_upper=*/std::numeric_limits<int64_t>::max())));
+ ICING_ASSERT_OK(unsorted_buckets->PersistToDisk());
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc,
+ unsorted_buckets->ComputeChecksum());
+ ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+ }
- // Manually corrupt unsorted_buckets_crc
- crcs.component_crcs.unsorted_buckets_crc += kCorruptedValueOffset;
- crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
- ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
- sizeof(Crcs)));
{
// Attempt to create the integer index storage with metadata containing
// corrupted unsorted_buckets_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
- storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ storage_or = IntegerIndexStorage::Create(filesystem_, working_path_,
Options(), serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- storage_or.status().error_message(),
- HasSubstr("Mismatch crc with IntegerIndexStorage unsorted buckets"));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid storages crc"));
}
}
-// TODO(b/259744228): add test for corrupted flash_index_storage_crc
+// TODO(b/259744228): add test for corrupted flash_index_storage
TEST_F(IntegerIndexStorageTest, InvalidQuery) {
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ IntegerIndexStorage::Create(filesystem_, working_path_, Options(),
serializer_.get()));
EXPECT_THAT(
storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1),
@@ -546,7 +559,7 @@ TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -599,7 +612,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -658,7 +671,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -699,7 +712,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -722,7 +735,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -775,7 +788,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -831,7 +844,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -881,7 +894,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -931,7 +944,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -1018,7 +1031,7 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -1059,7 +1072,7 @@ TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
@@ -1093,7 +1106,7 @@ TEST_F(IntegerIndexStorageTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
- filesystem_, base_dir_,
+ filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets)),
serializer_.get()));
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
new file mode 100644
index 0000000..4de437e
--- /dev/null
+++ b/icing/index/numeric/integer-index.cc
@@ -0,0 +1,242 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to get the file name of metadata.
+std::string GetMetadataFileName() {
+ return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
+}
+
+// Helper function to get the file path of metadata according to the given
+// working directory.
+std::string GetMetadataFilePath(std::string_view working_path) {
+ return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
+}
+
+// Helper function to get the sub working (directory) path of
+// IntegerIndexStorage according to the given working directory and property
+// path.
+std::string GetPropertyIndexStoragePath(std::string_view working_path,
+ std::string_view property_path) {
+ return absl_ports::StrCat(working_path, "/", property_path);
+}
+
+// Helper function to get all existing property paths by listing all
+// directories.
+libtextclassifier3::StatusOr<std::vector<std::string>>
+GetAllExistingPropertyPaths(const Filesystem& filesystem,
+ const std::string& working_path) {
+ std::vector<std::string> property_paths;
+ if (!filesystem.ListDirectory(working_path.c_str(),
+ /*exclude=*/{GetMetadataFileName()},
+ /*recursive=*/false, &property_paths)) {
+ return absl_ports::InternalError("Failed to list directory");
+ }
+ return property_paths;
+}
+
+libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
+GetPropertyIntegerIndexStorageMap(
+ const Filesystem& filesystem, const std::string& working_path,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
+ GetAllExistingPropertyPaths(filesystem, working_path));
+
+ IntegerIndex::PropertyToStorageMapType property_to_storage_map;
+ for (const std::string& property_path : property_paths) {
+ std::string storage_working_path =
+ GetPropertyIndexStoragePath(working_path, property_path);
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem, storage_working_path,
+ IntegerIndexStorage::Options(),
+ posting_list_serializer));
+ property_to_storage_map.insert(
+ std::make_pair(property_path, std::move(storage)));
+ }
+
+ return property_to_storage_map;
+}
+
+} // namespace
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::Create(const Filesystem& filesystem, std::string working_path) {
+ if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
+ // Discard working_path if metadata file is missing, and reinitialize.
+ ICING_RETURN_IF_ERROR(Discard(filesystem, working_path, kWorkingPathType));
+ return InitializeNewFiles(filesystem, std::move(working_path));
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_path));
+}
+
+IntegerIndex::~IntegerIndex() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to persist integer index to disk while destructing "
+ << working_path_;
+ }
+}
+
+libtextclassifier3::Status IntegerIndex::Reset() {
+ // Step 1: clear property_to_storage_map_.
+ property_to_storage_map_.clear();
+
+ // Step 2: delete all IntegerIndexStorages. It is safe because there is no
+ // active IntegerIndexStorage after clearing the map.
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<std::string> property_paths,
+ GetAllExistingPropertyPaths(filesystem_, working_path_));
+ for (const std::string& property_path : property_paths) {
+ ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
+ filesystem_,
+ GetPropertyIndexStoragePath(working_path_, property_path)));
+ }
+
+ info()->last_added_document_id = kInvalidDocumentId;
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
+ std::string&& working_path) {
+ // Create working directory.
+ if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_path));
+ }
+
+ // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
+ /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
+
+ // Create instance.
+ auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
+ filesystem, std::move(working_path),
+ std::make_unique<PostingListIntegerIndexSerializer>(),
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ /*property_to_storage_map=*/{}));
+ // Initialize info content by writing mapped memory directly.
+ Info* info_ptr = new_integer_index->info();
+ info_ptr->magic = Info::kMagic;
+ info_ptr->last_added_document_id = kInvalidDocumentId;
+ // Initialize new PersistentStorage. The initial checksums will be computed
+ // and set via InitializeNewStorage.
+ ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
+
+ return new_integer_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path) {
+ // Mmap the content of the crcs and info.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+
+ auto posting_list_serializer =
+ std::make_unique<PostingListIntegerIndexSerializer>();
+
+ // Initialize all existing integer index storages.
+ ICING_ASSIGN_OR_RETURN(
+ PropertyToStorageMapType property_to_storage_map,
+ GetPropertyIntegerIndexStorageMap(filesystem, working_path,
+ posting_list_serializer.get()));
+
+ // Create instance.
+ auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
+ filesystem, std::move(working_path), std::move(posting_list_serializer),
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(property_to_storage_map)));
+ // Initialize existing PersistentStorage. Checksums will be validated.
+ ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
+
+ // Validate magic.
+ if (integer_index->info()->magic != Info::kMagic) {
+ return absl_ports::FailedPreconditionError("Incorrect magic value");
+ }
+
+ return integer_index;
+}
+
+libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
+ for (auto& [_, storage] : property_to_storage_map_) {
+ ICING_RETURN_IF_ERROR(storage->PersistToDisk());
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk() {
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ return metadata_mmapped_file_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum() {
+ return info()->ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum() {
+ // XOR all crcs of all storages. Since XOR is commutative and associative, the
+ // order doesn't matter.
+ uint32_t storages_checksum = 0;
+ for (auto& [property_path, storage] : property_to_storage_map_) {
+ ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
+ storage_crc.Append(property_path);
+
+ storages_checksum ^= storage_crc.Get();
+ }
+ return Crc32(storages_checksum);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
new file mode 100644
index 0000000..a00d339
--- /dev/null
+++ b/icing/index/numeric/integer-index.h
@@ -0,0 +1,190 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
+#define ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/numeric/integer-index-storage.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// IntegerIndex: a wrapper class for managing IntegerIndexStorage (a lower level
+// persistent storage class for indexing and searching contents of integer type
+// sections in documents) instances for different property paths.
+// We separate indexable integer data from different properties into different
+// storages, and IntegerIndex manages and handles indexable integer data
+// appropriately to their corresponding IntegerIndexStorage instance according
+// to the given property path.
+class IntegerIndex : public NumericIndex<int64_t> {
+ public:
+ using PropertyToStorageMapType =
+ std::unordered_map<std::string, std::unique_ptr<IntegerIndexStorage>>;
+
+ struct Info {
+ static constexpr int32_t kMagic = 0x238a3dcb;
+
+ int32_t magic;
+ DocumentId last_added_document_id;
+
+ Crc32 ComputeChecksum() const {
+ return Crc32(
+ std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+ }
+ } __attribute__((packed));
+ static_assert(sizeof(Info) == 8, "");
+
+ // Metadata file layout: <Crcs><Info>
+ static constexpr int32_t kCrcsMetadataFileOffset = 0;
+ static constexpr int32_t kInfoMetadataFileOffset =
+ static_cast<int32_t>(sizeof(Crcs));
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 20, "");
+
+ static constexpr WorkingPathType kWorkingPathType =
+ WorkingPathType::kDirectory;
+ static constexpr std::string_view kFilePrefix = "integer_index";
+
+ // Creates a new IntegerIndex instance to index integers. If any of the
+ // underlying file is missing, then delete the whole working_path and
+ // (re)initialize with new ones. Otherwise initialize and create the instance
+ // by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // working_path: Specifies the working path for PersistentStorage.
+ // IntegerIndex uses working path as working directory and all
+ // related files will be stored under this directory. See
+ // PersistentStorage for more details about the concept of
+ // working_path.
+ //
+ // Returns:
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum.
+ // - INTERNAL_ERROR on I/O errors.
+ // - Any FileBackedVector/MemoryMappedFile errors.
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
+ const Filesystem& filesystem, std::string working_path);
+
+ ~IntegerIndex() override;
+
+ // TODO(b/249829533): implement these functions and add comments.
+ std::unique_ptr<typename NumericIndex<int64_t>::Editor> Edit(
+ std::string_view property_path, DocumentId document_id,
+ SectionId section_id) override;
+
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+ std::string_view property_path, int64_t key_lower,
+ int64_t key_upper) const override;
+
+ // Clears all integer index data.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status Reset() override;
+
+ private:
+ explicit IntegerIndex(const Filesystem& filesystem,
+ std::string&& working_path,
+ std::unique_ptr<PostingListIntegerIndexSerializer>
+ posting_list_serializer,
+ std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+ PropertyToStorageMapType&& property_to_storage_map)
+ : NumericIndex<int64_t>(filesystem, std::move(working_path),
+ kWorkingPathType),
+ posting_list_serializer_(std::move(posting_list_serializer)),
+ metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+ property_to_storage_map_(std::move(property_to_storage_map)) {}
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path);
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ InitializeExistingFiles(const Filesystem& filesystem,
+ std::string&& working_path);
+
+ // Flushes contents of all storages to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistStoragesToDisk() override;
+
+ // Flushes contents of metadata file.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistMetadataToDisk() override;
+
+ // Computes and returns Info checksum.
+ //
+ // Returns:
+ // - Crc of the Info on success
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+
+ // Computes and returns all storages checksum. Checksums of bucket_storage_,
+ // entry_storage_ and kv_storage_ will be combined together by XOR.
+ //
+ // Returns:
+ // - Crc of all storages on success
+ // - INTERNAL_ERROR if any data inconsistency
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+
+ Crcs& crcs() override {
+ return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ const Crcs& crcs() const override {
+ return *reinterpret_cast<const Crcs*>(metadata_mmapped_file_->region() +
+ kCrcsMetadataFileOffset);
+ }
+
+ Info* info() {
+ return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
+ }
+
+ const Info* info() const {
+ return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
+ }
+
+ std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;
+
+ std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
+
+ // Property path to integer index storage map.
+ PropertyToStorageMapType property_to_storage_map_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_NUMERIC_INTEGER_INDEX_H_
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
index 6798f8d..a9d65d4 100644
--- a/icing/index/numeric/numeric-index.h
+++ b/icing/index/numeric/numeric-index.h
@@ -21,6 +21,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/persistent-storage.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -29,7 +30,7 @@ namespace icing {
namespace lib {
template <typename T>
-class NumericIndex {
+class NumericIndex : public PersistentStorage {
public:
using value_type = T;
@@ -46,9 +47,9 @@ class NumericIndex {
// add these records into numeric index.
class Editor {
public:
- explicit Editor(std::string_view property_name, DocumentId document_id,
+ explicit Editor(std::string_view property_path, DocumentId document_id,
SectionId section_id)
- : property_name_(property_name),
+ : property_path_(property_path),
document_id_(document_id),
section_id_(section_id) {}
@@ -69,7 +70,7 @@ class NumericIndex {
virtual libtextclassifier3::Status IndexAllBufferedKeys() = 0;
protected:
- std::string property_name_;
+ std::string property_path_;
DocumentId document_id_;
SectionId section_id_;
};
@@ -106,7 +107,7 @@ class NumericIndex {
// Returns an Editor instance for adding new records into numeric index for a
// given property, DocumentId and SectionId. See Editor for more details.
- virtual std::unique_ptr<Editor> Edit(std::string_view property_name,
+ virtual std::unique_ptr<Editor> Edit(std::string_view property_path,
DocumentId document_id,
SectionId section_id) = 0;
@@ -121,23 +122,36 @@ class NumericIndex {
//
// Returns:
// - std::unique_ptr<DocHitInfoIterator> on success
- // - NOT_FOUND_ERROR if there is no numeric index for property_name
+ // - NOT_FOUND_ERROR if there is no numeric index for property_path
// - INVALID_ARGUMENT_ERROR if key_lower > key_upper
// - Any other errors, depending on the actual implementation
virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
- GetIterator(std::string_view property_name, T key_lower,
+ GetIterator(std::string_view property_path, T key_lower,
T key_upper) const = 0;
// Clears all files created by the index. Returns OK if all files were
// cleared.
virtual libtextclassifier3::Status Reset() = 0;
- // Syncs all the data and metadata changes to disk.
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on I/O errors
- virtual libtextclassifier3::Status PersistToDisk() = 0;
+ protected:
+ explicit NumericIndex(const Filesystem& filesystem,
+ std::string&& working_path,
+ PersistentStorage::WorkingPathType working_path_type)
+ : PersistentStorage(filesystem, std::move(working_path),
+ working_path_type) {}
+
+ virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0;
+
+ virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0;
+
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum()
+ override = 0;
+
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum()
+ override = 0;
+
+ virtual Crcs& crcs() override = 0;
+ virtual const Crcs& crcs() const override = 0;
};
} // namespace lib
diff --git a/icing/index/numeric/numeric-index_test.cc b/icing/index/numeric/numeric-index_test.cc
index 38769f6..d4ff963 100644
--- a/icing/index/numeric/numeric-index_test.cc
+++ b/icing/index/numeric/numeric-index_test.cc
@@ -23,12 +23,14 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/dummy-numeric-index.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
@@ -37,6 +39,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::NotNull;
constexpr static std::string_view kDefaultTestPropertyName = "test";
@@ -49,16 +52,29 @@ class NumericIndexTest : public ::testing::Test {
using INDEX_IMPL_TYPE = T;
void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_path_ = base_dir_ + "/numeric_index_integer_test";
+
if (std::is_same_v<
INDEX_IMPL_TYPE,
DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>>) {
- numeric_index_ = std::make_unique<
- DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<typename INDEX_IMPL_TYPE::value_type>::Create(
+ filesystem_, working_path_));
}
ASSERT_THAT(numeric_index_, NotNull());
}
+ void TearDown() override {
+ numeric_index_.reset();
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
void Index(std::string_view property_name, DocumentId document_id,
SectionId section_id,
std::vector<typename INDEX_IMPL_TYPE::value_type> keys) {
@@ -86,6 +102,9 @@ class NumericIndexTest : public ::testing::Test {
return result;
}
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string working_path_;
std::unique_ptr<NumericIndex<typename INDEX_IMPL_TYPE::value_type>>
numeric_index_;
};
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index 9b1db7e..9a5e299 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -114,7 +114,7 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
// merge.
if ((status.ok() || absl_ports::IsResourceExhausted(status)) &&
index_.WantsMerge()) {
- ICING_LOG(ERROR) << "Merging the index at docid " << document_id << ".";
+ ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
libtextclassifier3::Status merge_status = index_.Merge();
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index 51f3106..f2a33e0 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -17,7 +17,6 @@
#include <string>
#include <utility>
-#include <google/protobuf/message_lite.h>
#include "icing/icing-search-engine.h"
#include "icing/jni/jni-cache.h"
#include "icing/jni/scoped-primitive-array-critical.h"
@@ -33,6 +32,7 @@
#include "icing/proto/usage.pb.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
+#include <google/protobuf/message_lite.h>
namespace {
@@ -46,8 +46,8 @@ bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
return protobuf->ParseFromArray(scoped_array.data(), scoped_array.size());
}
-jbyteArray SerializeProtoToJniByteArray(
- JNIEnv* env, const google::protobuf::MessageLite& protobuf) {
+jbyteArray SerializeProtoToJniByteArray(JNIEnv* env,
+ const google::protobuf::MessageLite& protobuf) {
int size = protobuf.ByteSizeLong();
jbyteArray ret = env->NewByteArray(size);
if (ret == nullptr) {
diff --git a/icing/join/join-children-fetcher.cc b/icing/join/join-children-fetcher.cc
new file mode 100644
index 0000000..c6d1b97
--- /dev/null
+++ b/icing/join/join-children-fetcher.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/join-children-fetcher.h"
+
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>>
+JoinChildrenFetcher::GetChildren(DocumentId parent_doc_id) const {
+ if (join_spec_.parent_property_expression() == kQualifiedIdExpr) {
+ if (auto iter = map_joinable_qualified_id_.find(parent_doc_id);
+ iter != map_joinable_qualified_id_.end()) {
+ return iter->second;
+ }
+ return std::vector<ScoredDocumentHit>();
+ }
+ // TODO(b/256022027): So far we only support kQualifiedIdExpr for
+ // parent_property_expression, we could support more.
+ return absl_ports::UnimplementedError(absl_ports::StrCat(
+ "Parent property expression must be ", kQualifiedIdExpr));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/join-children-fetcher.h b/icing/join/join-children-fetcher.h
new file mode 100644
index 0000000..5f799b8
--- /dev/null
+++ b/icing/join/join-children-fetcher.h
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
+#define ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
+
+#include <unordered_map>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// A class that provides the GetChildren method for joins to fetch all children
+// documents given a parent document id.
+//
+// Internally, the class maintains a map for each joinable value type that
+// groups children according to the joinable values. Currently we only support
+// QUALIFIED_ID joining, in which the joinable value type is document id.
+class JoinChildrenFetcher {
+ public:
+ explicit JoinChildrenFetcher(
+ const JoinSpecProto& join_spec,
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>&&
+ map_joinable_qualified_id)
+ : join_spec_(join_spec),
+ map_joinable_qualified_id_(std::move(map_joinable_qualified_id)) {}
+
+ // Get a vector of children ScoredDocumentHit by parent document id.
+ //
+ // TODO(b/256022027): Implement property value joins with types of string and
+ // int. In these cases, GetChildren should look up joinable cache to fetch
+ // joinable property value of the given parent_doc_id according to
+ // join_spec_.parent_property_expression, and then fetch children by the
+ // corresponding map in this class using the joinable property value.
+ //
+ // Returns:
+ // The vector of results on success.
+ // UNIMPLEMENTED_ERROR if the join type specified by join_spec is not
+ // supported.
+ libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>> GetChildren(
+ DocumentId parent_doc_id) const;
+
+ private:
+ static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()";
+
+ const JoinSpecProto& join_spec_; // Does not own!
+
+ // The map that groups children by qualified id used to support QualifiedId
+ // joining. The joining type is document id.
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ map_joinable_qualified_id_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_JOIN_JOIN_CHILDREN_FETCHER_H_
diff --git a/icing/join/join-children-fetcher_test.cc b/icing/join/join-children-fetcher_test.cc
new file mode 100644
index 0000000..75e9a14
--- /dev/null
+++ b/icing/join/join-children-fetcher_test.cc
@@ -0,0 +1,82 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+
+#include "icing/join/join-children-fetcher.h"
+
+#include <unordered_map>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/join/join-processor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+TEST(JoinChildrenFetcherTest, FetchQualifiedIdJoinChildren) {
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec.set_child_property_expression("sender");
+
+ std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+ map_joinable_qualified_id;
+ DocumentId parent_doc_id = 0;
+ ScoredDocumentHit child1(/*document_id=*/1, kSectionIdMaskNone,
+ /*score=*/1.0);
+ ScoredDocumentHit child2(/*document_id=*/2, kSectionIdMaskNone,
+ /*score=*/2.0);
+ map_joinable_qualified_id[parent_doc_id].push_back(child1);
+ map_joinable_qualified_id[parent_doc_id].push_back(child2);
+
+ JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id));
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<ScoredDocumentHit> children,
+ fetcher.GetChildren(parent_doc_id));
+ EXPECT_THAT(children, ElementsAre(EqualsScoredDocumentHit(child1),
+ EqualsScoredDocumentHit(child2)));
+}
+
+TEST(JoinChildrenFetcherTest, FetchJoinEmptyChildren) {
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec.set_child_property_expression("sender");
+
+ DocumentId parent_doc_id = 0;
+
+ JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{});
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<ScoredDocumentHit> children,
+ fetcher.GetChildren(parent_doc_id));
+ EXPECT_THAT(children, IsEmpty());
+}
+
+TEST(JoinChildrenFetcherTest, UnsupportedJoin) {
+ JoinSpecProto join_spec;
+ join_spec.set_parent_property_expression("name");
+ join_spec.set_child_property_expression("sender");
+ JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{});
+ EXPECT_THAT(fetcher.GetChildren(0),
+ StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
index 7700397..ab32850 100644
--- a/icing/join/join-processor.cc
+++ b/icing/join/join-processor.cc
@@ -34,11 +34,17 @@
namespace icing {
namespace lib {
-libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
-JoinProcessor::Join(
+libtextclassifier3::StatusOr<JoinChildrenFetcher>
+JoinProcessor::GetChildrenFetcher(
const JoinSpecProto& join_spec,
- std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+ if (join_spec.parent_property_expression() != kQualifiedIdExpr) {
+ // TODO(b/256022027): So far we only support kQualifiedIdExpr for
+ // parent_property_expression, we could support more.
+ return absl_ports::UnimplementedError(absl_ports::StrCat(
+ "Parent property expression must be ", kQualifiedIdExpr));
+ }
+
std::sort(
child_scored_document_hits.begin(), child_scored_document_hits.end(),
ScoredDocumentHitComparator(
@@ -59,7 +65,7 @@ JoinProcessor::Join(
// ScoredDocumentHits refer to. The values in this map are vectors of child
// ScoredDocumentHits that refer to a parent DocumentId.
std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
- parent_id_to_child_map;
+ map_joinable_qualified_id;
for (const ScoredDocumentHit& child : child_scored_document_hits) {
std::string property_content = FetchPropertyExpressionValue(
child.document_id(), join_spec.child_property_expression());
@@ -84,14 +90,21 @@ JoinProcessor::Join(
DocumentId parent_doc_id = std::move(parent_doc_id_or).ValueOrDie();
// Since we've already sorted child_scored_document_hits, just simply omit
- // if the parent_id_to_child_map[parent_doc_id].size() has reached max
+ // if the map_joinable_qualified_id[parent_doc_id].size() has reached max
// joined child count.
- if (parent_id_to_child_map[parent_doc_id].size() <
+ if (map_joinable_qualified_id[parent_doc_id].size() <
join_spec.max_joined_child_count()) {
- parent_id_to_child_map[parent_doc_id].push_back(child);
+ map_joinable_qualified_id[parent_doc_id].push_back(child);
}
}
+ return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id));
+}
+libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
+JoinProcessor::Join(
+ const JoinSpecProto& join_spec,
+ std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
+ const JoinChildrenFetcher& join_children_fetcher) {
std::unique_ptr<AggregationScorer> aggregation_scorer =
AggregationScorer::Create(join_spec);
@@ -100,23 +113,11 @@ JoinProcessor::Join(
// Step 2: iterate through all parent documentIds and construct
// JoinedScoredDocumentHit for each by looking up
- // parent_id_to_child_map.
+ // join_children_fetcher.
for (ScoredDocumentHit& parent : parent_scored_document_hits) {
- DocumentId parent_doc_id = kInvalidDocumentId;
- if (join_spec.parent_property_expression() == kQualifiedIdExpr) {
- parent_doc_id = parent.document_id();
- } else {
- // TODO(b/256022027): So far we only support kQualifiedIdExpr for
- // parent_property_expression, we could support more.
- return absl_ports::UnimplementedError(absl_ports::StrCat(
- "Parent property expression must be ", kQualifiedIdExpr));
- }
-
- std::vector<ScoredDocumentHit> children;
- if (auto iter = parent_id_to_child_map.find(parent_doc_id);
- iter != parent_id_to_child_map.end()) {
- children = std::move(iter->second);
- }
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<ScoredDocumentHit> children,
+ join_children_fetcher.GetChildren(parent.document_id()));
double final_score = aggregation_scorer->GetScore(parent, children);
joined_scored_document_hits.emplace_back(final_score, std::move(parent),
diff --git a/icing/join/join-processor.h b/icing/join/join-processor.h
index 65c9e5f..9d5ee11 100644
--- a/icing/join/join-processor.h
+++ b/icing/join/join-processor.h
@@ -20,6 +20,7 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/proto/search.pb.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-store.h"
@@ -34,10 +35,21 @@ class JoinProcessor {
explicit JoinProcessor(const DocumentStore* doc_store)
: doc_store_(doc_store) {}
+ // Get a JoinChildrenFetcher used to fetch all children documents by a parent
+ // document id.
+ //
+ // Returns:
+ // A JoinChildrenFetcher instance on success.
+ // UNIMPLEMENTED_ERROR if the join type specified by join_spec is not
+ // supported.
+ libtextclassifier3::StatusOr<JoinChildrenFetcher> GetChildrenFetcher(
+ const JoinSpecProto& join_spec,
+ std::vector<ScoredDocumentHit>&& child_scored_document_hits);
+
libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
const JoinSpecProto& join_spec,
std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
- std::vector<ScoredDocumentHit>&& child_scored_document_hits);
+ const JoinChildrenFetcher& join_children_fetcher);
private:
// Loads a document and uses a property expression to fetch the value of the
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index 70eaf3f..00f2b1c 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -88,6 +88,20 @@ class JoinProcessorTest : public ::testing::Test {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
+ const JoinSpecProto& join_spec,
+ std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
+ std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+ JoinProcessor join_processor(doc_store_.get());
+ ICING_ASSIGN_OR_RETURN(
+ JoinChildrenFetcher join_children_fetcher,
+ join_processor.GetChildrenFetcher(
+ join_spec, std::move(child_scored_document_hits)));
+ return join_processor.Join(join_spec,
+ std::move(parent_scored_document_hits),
+ join_children_fetcher);
+ }
+
Filesystem filesystem_;
std::string test_dir_;
std::unique_ptr<SchemaStore> schema_store_;
@@ -165,11 +179,10 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
EXPECT_THAT(
joined_result_document_hits,
ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
@@ -232,11 +245,10 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Since Email2 doesn't have "sender" property, it should be ignored.
EXPECT_THAT(
joined_result_document_hits,
@@ -310,11 +322,10 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Email 2 and email 3 (document id 3 and 4) contain invalid qualified ids.
// Join processor should ignore them.
EXPECT_THAT(joined_result_document_hits,
@@ -373,11 +384,10 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Person1 has no child documents, but left join should also include it.
EXPECT_THAT(
joined_result_document_hits,
@@ -452,11 +462,10 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Child documents should be sorted according to the (nested) ranking
// strategy.
EXPECT_THAT(
@@ -548,11 +557,10 @@ TEST_F(JoinProcessorTest,
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
// Since we set max_joind_child_count as 2 and use DESC as the (nested)
// ranking strategy, parent document with # of child documents more than 2
// should only keep 2 child documents with higher scores and the rest should
@@ -601,11 +609,10 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
ScoringSpecProto::Order::DESC);
- JoinProcessor join_processor(doc_store_.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- join_processor.Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
+ Join(join_spec, std::move(parent_scored_document_hits),
+ std::move(child_scored_document_hits)));
EXPECT_THAT(joined_result_document_hits,
ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
/*final_score=*/1.0,
diff --git a/icing/portable/equals-proto.h b/icing/portable/equals-proto.h
index 6a600be..8bb835e 100644
--- a/icing/portable/equals-proto.h
+++ b/icing/portable/equals-proto.h
@@ -20,8 +20,8 @@
#ifndef ICING_PORTABLE_EQUALS_PROTO_H_
#define ICING_PORTABLE_EQUALS_PROTO_H_
+#include "gmock/gmock.h" // IWYU pragma: export
#include <google/protobuf/message_lite.h> // IWYU pragma: export
-#include "gmock/gmock.h" // IWYU pragma: export
#if defined(__ANDROID__) || defined(__APPLE__)
namespace icing {
diff --git a/icing/portable/gzip_stream.h b/icing/portable/gzip_stream.h
index 602093f..8008a55 100644
--- a/icing/portable/gzip_stream.h
+++ b/icing/portable/gzip_stream.h
@@ -27,8 +27,8 @@
#ifndef GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
#define GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include "icing/portable/zlib.h"
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
namespace icing {
namespace lib {
@@ -50,9 +50,8 @@ class GzipInputStream : public google::protobuf::io::ZeroCopyInputStream {
};
// buffer_size and format may be -1 for default of 64kB and GZIP format
- explicit GzipInputStream(
- google::protobuf::io::ZeroCopyInputStream* sub_stream,
- Format format = AUTO, int buffer_size = -1);
+ explicit GzipInputStream(google::protobuf::io::ZeroCopyInputStream* sub_stream,
+ Format format = AUTO, int buffer_size = -1);
virtual ~GzipInputStream();
// Return last error message or NULL if no error.
@@ -113,13 +112,11 @@ class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
};
// Create a GzipOutputStream with default options.
- explicit GzipOutputStream(
- google::protobuf::io::ZeroCopyOutputStream* sub_stream);
+ explicit GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream);
// Create a GzipOutputStream with the given options.
- GzipOutputStream(
- google::protobuf::io::ZeroCopyOutputStream* sub_stream,
- const Options& options);
+ GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream,
+ const Options& options);
virtual ~GzipOutputStream();
@@ -164,9 +161,8 @@ class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
size_t input_buffer_length_;
// Shared constructor code.
- void Init(
- google::protobuf::io::ZeroCopyOutputStream* sub_stream,
- const Options& options);
+ void Init(google::protobuf::io::ZeroCopyOutputStream* sub_stream,
+ const Options& options);
// Do some compression.
// Takes zlib flush mode.
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
index 924b41f..659ad7b 100644
--- a/icing/query/advanced_query_parser/query-visitor.cc
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -105,11 +105,44 @@ libtextclassifier3::StatusOr<Int64Range> GetInt64Range(
} // namespace
-libtextclassifier3::StatusOr<int64_t> QueryVisitor::RetrieveIntValue() {
- if (pending_values_.empty() || !pending_values_.top().holds_text()) {
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+QueryVisitor::CreateTermIterator(const std::string& term) {
+ if (!processing_not_) {
+ // 1. Add term to property_query_terms_map
+ auto property_restrict_or = GetPropertyRestrict();
+ if (property_restrict_or.ok()) {
+ std::string property_restrict =
+ std::move(property_restrict_or).ValueOrDie();
+ property_query_terms_map_[std::move(property_restrict)].insert(term);
+ } else {
+ ICING_LOG(DBG) << "Unsatisfiable property restrict, "
+ << property_restrict_or.status().error_message();
+ }
+
+ // 2. If needed add term iterator to query_term_iterators_ map.
+ if (needs_term_frequency_info_) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<DocHitInfoIterator> term_iterator,
+ index_.GetIterator(term, kSectionIdMaskAll, match_type_,
+ needs_term_frequency_info_));
+ query_term_iterators_[term] = std::make_unique<DocHitInfoIteratorFilter>(
+ std::move(term_iterator), &document_store_, &schema_store_,
+ filter_options_);
+ }
+ }
+
+ // 3. Add the term iterator.
+ // TODO(b/208654892): Add support for the prefix operator (*).
+ return index_.GetIterator(term, kSectionIdMaskAll, match_type_,
+ needs_term_frequency_info_);
+}
+
+libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
+ if (pending_values_.empty() ||
+ pending_values_.top().data_type() != PendingValue::DataType::kText) {
return absl_ports::InvalidArgumentError("Unable to retrieve int value.");
}
- std::string& value = pending_values_.top().text;
+ const std::string& value = pending_values_.top().term();
char* value_end;
int64_t int_value = std::strtoll(value.c_str(), &value_end, /*base=*/10);
if (value_end != value.c_str() + value.length()) {
@@ -120,54 +153,71 @@ libtextclassifier3::StatusOr<int64_t> QueryVisitor::RetrieveIntValue() {
return int_value;
}
-libtextclassifier3::StatusOr<std::string> QueryVisitor::RetrieveStringValue() {
- if (pending_values_.empty() || !pending_values_.top().holds_text()) {
- return absl_ports::InvalidArgumentError("Unable to retrieve string value.");
+libtextclassifier3::StatusOr<std::string>
+QueryVisitor::PopPendingStringValue() {
+ if (pending_values_.empty() ||
+ pending_values_.top().data_type() != PendingValue::DataType::kString) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
}
- std::string string_value = std::move(pending_values_.top().text);
+ std::string string_value = std::move(pending_values_.top().term());
pending_values_.pop();
return string_value;
}
+libtextclassifier3::StatusOr<std::string> QueryVisitor::PopPendingTextValue() {
+ if (pending_values_.empty() ||
+ pending_values_.top().data_type() != PendingValue::DataType::kText) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve text value.");
+ }
+ std::string text_value = std::move(pending_values_.top().term());
+ pending_values_.pop();
+ return text_value;
+}
+
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
-QueryVisitor::RetrieveIterator() {
- if (pending_values_.top().holds_iterator()) {
+QueryVisitor::PopPendingIterator() {
+ if (pending_values_.empty() || pending_values_.top().is_placeholder()) {
+ return absl_ports::InvalidArgumentError("Unable to retrieve iterator.");
+ }
+ if (pending_values_.top().data_type() ==
+ PendingValue::DataType::kDocIterator) {
std::unique_ptr<DocHitInfoIterator> iterator =
- std::move(pending_values_.top().iterator);
+ std::move(pending_values_.top().iterator());
pending_values_.pop();
return iterator;
- }
- ICING_ASSIGN_OR_RETURN(std::string value, RetrieveStringValue());
- if (!processing_not_ && needs_term_frequency_info_) {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<DocHitInfoIterator> term_iterator,
- index_.GetIterator(value, kSectionIdMaskAll, match_type_,
- needs_term_frequency_info_));
- query_term_iterators_[value] = std::make_unique<DocHitInfoIteratorFilter>(
- std::move(term_iterator), &document_store_, &schema_store_,
- filter_options_);
- }
- if (!processing_not_) {
- auto property_restrict_or = GetPropertyRestrict();
- if (property_restrict_or.ok()) {
- property_query_terms_map_[std::move(property_restrict_or).ValueOrDie()]
- .insert(value);
- } else {
- ICING_LOG(DBG) << "Unsatisfiable property restrict, "
- << property_restrict_or.status().error_message();
+ } else if (pending_values_.top().data_type() ==
+ PendingValue::DataType::kString) {
+ features_.insert(kVerbatimSearchFeature);
+ ICING_ASSIGN_OR_RETURN(std::string value, PopPendingStringValue());
+ return CreateTermIterator(std::move(value));
+ } else {
+ ICING_ASSIGN_OR_RETURN(std::string value, PopPendingTextValue());
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> token_itr,
+ tokenizer_.Tokenize(value));
+ std::string normalized_term;
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+ while (token_itr->Advance()) {
+ for (const Token& token : token_itr->GetTokens()) {
+ normalized_term = normalizer_.NormalizeTerm(token.text);
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iterator,
+ CreateTermIterator(std::move(normalized_term)));
+ iterators.push_back(std::move(iterator));
+ }
}
+
+ // Finally, create an And Iterator. If there's only a single term here, then
+ // it will just return that term iterator. Otherwise, segmented text is
+ // treated as a group of terms AND'd together.
+ return CreateAndIterator(std::move(iterators));
}
- // Make it into a term iterator.
- return index_.GetIterator(value, kSectionIdMaskAll, match_type_,
- needs_term_frequency_info_);
}
libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
-QueryVisitor::RetrieveIterators() {
+QueryVisitor::PopAllPendingIterators() {
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> itr,
- RetrieveIterator());
+ PopPendingIterator());
iterators.push_back(std::move(itr));
}
if (pending_values_.empty()) {
@@ -185,8 +235,8 @@ QueryVisitor::ProcessNumericComparator(const NaryOperatorNode* node) {
// 1. The children should have been processed and added their outputs to
// pending_values_. Time to process them.
// The first two pending values should be the int value and the property.
- ICING_ASSIGN_OR_RETURN(int64_t int_value, RetrieveIntValue());
- ICING_ASSIGN_OR_RETURN(std::string property, RetrieveStringValue());
+ ICING_ASSIGN_OR_RETURN(int64_t int_value, PopPendingIntValue());
+ ICING_ASSIGN_OR_RETURN(std::string property, PopPendingTextValue());
// 2. Create the iterator.
ICING_ASSIGN_OR_RETURN(Int64Range range,
@@ -207,7 +257,7 @@ libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
QueryVisitor::ProcessAndOperator(const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
- RetrieveIterators());
+ PopAllPendingIterators());
return PendingValue(CreateAndIterator(std::move(iterators)));
}
@@ -215,23 +265,18 @@ libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
QueryVisitor::ProcessOrOperator(const NaryOperatorNode* node) {
ICING_ASSIGN_OR_RETURN(
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators,
- RetrieveIterators());
+ PopAllPendingIterators());
return PendingValue(CreateOrIterator(std::move(iterators)));
}
libtextclassifier3::StatusOr<QueryVisitor::PendingValue>
QueryVisitor::ProcessHasOperator(const NaryOperatorNode* node) {
- // 1. The children should have been processed and added their outputs to
+ // The children should have been processed and added their outputs to
// pending_values_. Time to process them.
// The first two pending values should be the delegate and the property.
ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> delegate,
- RetrieveIterator());
- // TODO(b/208654892): The HAS operator need to be able to differentiate
- // between values that came from STRING nodes and those that came from
- // members. members should be allowed as the left operator to HAS, but STRINGs
- // should not be. IOW, `"prop1":foo` should not be treated equivalently to
- // `prop1:foo`
- ICING_ASSIGN_OR_RETURN(std::string property, RetrieveStringValue());
+ PopPendingIterator());
+ ICING_ASSIGN_OR_RETURN(std::string property, PopPendingTextValue());
return PendingValue(std::make_unique<DocHitInfoIteratorSectionRestrict>(
std::move(delegate), &document_store_, &schema_store_,
std::move(property)));
@@ -259,21 +304,22 @@ void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) {
}
void QueryVisitor::VisitString(const StringNode* node) {
+ // A STRING node can only be a term. Create the iterator now.
auto escaped_string_or = EscapeStringValue(node->value());
if (!escaped_string_or.ok()) {
pending_error_ = std::move(escaped_string_or).status();
return;
}
- features_.insert(kVerbatimSearchFeature);
- std::string escaped_string = std::move(escaped_string_or).ValueOrDie();
- pending_values_.push(PendingValue(std::move(escaped_string)));
+ pending_values_.push(PendingValue::CreateStringPendingValue(
+ std::move(escaped_string_or).ValueOrDie()));
}
void QueryVisitor::VisitText(const TextNode* node) {
- // TODO(b/208654892): Add support for 1. segmentation and 2. the prefix
- // prefix operator (*).
- std::string normalized_text = normalizer_.NormalizeTerm(node->value());
- pending_values_.push(PendingValue(std::move(normalized_text)));
+ // TEXT nodes could either be a term (and will become DocHitInfoIteratorTerm)
+ // or a property name. As such, we just push the TEXT value into pending
+ // values and determine which it is at a later point.
+ pending_values_.push(
+ PendingValue::CreateTextPendingValue(std::move(node->value())));
}
void QueryVisitor::VisitMember(const MemberNode* node) {
@@ -290,11 +336,11 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
// 3. The children should have been processed and added their outputs to
// pending_values_. Time to process them.
- std::string member = std::move(pending_values_.top().text);
+ std::string member = std::move(pending_values_.top().term());
pending_values_.pop();
while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) {
- member = absl_ports::StrCat(pending_values_.top().text, kPropertySeparator,
- member);
+ member = absl_ports::StrCat(pending_values_.top().term(),
+ kPropertySeparator, member);
pending_values_.pop();
}
@@ -307,7 +353,7 @@ void QueryVisitor::VisitMember(const MemberNode* node) {
}
pending_values_.pop();
- pending_values_.push(PendingValue(std::move(member)));
+ pending_values_.push(PendingValue::CreateTextPendingValue(std::move(member)));
}
void QueryVisitor::VisitFunction(const FunctionNode* node) {
@@ -347,7 +393,7 @@ void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) {
}
// 3. Retrieve the delegate iterator
- auto iterator_or = RetrieveIterator();
+ auto iterator_or = PopPendingIterator();
if (!iterator_or.ok()) {
pending_error_ = std::move(iterator_or).status();
return;
@@ -389,12 +435,12 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) {
return;
}
if (processing_has && !processing_not_ && i == 0) {
- if (!pending_values_.top().holds_text()) {
+ if (pending_values_.top().data_type() != PendingValue::DataType::kText) {
pending_error_ = absl_ports::InvalidArgumentError(
"Expected property before ':' operator.");
return;
}
- pending_property_restricts_.push_back(pending_values_.top().text);
+ pending_property_restricts_.push_back(pending_values_.top().term());
}
}
@@ -437,7 +483,7 @@ libtextclassifier3::StatusOr<QueryResults> QueryVisitor::ConsumeResults() && {
return absl_ports::InvalidArgumentError(
"Visitor does not contain a single root iterator.");
}
- auto iterator_or = RetrieveIterator();
+ auto iterator_or = PopPendingIterator();
if (!iterator_or.ok()) {
return std::move(iterator_or).status();
}
diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h
index 9e68572..414f1b9 100644
--- a/icing/query/advanced_query_parser/query-visitor.h
+++ b/icing/query/advanced_query_parser/query-visitor.h
@@ -21,7 +21,6 @@
#include <string>
#include <unordered_set>
-#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -31,6 +30,7 @@
#include "icing/query/query-results.h"
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
+#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
namespace icing {
@@ -40,19 +40,18 @@ namespace lib {
// the parser.
class QueryVisitor : public AbstractSyntaxTreeVisitor {
public:
- explicit QueryVisitor(Index* index,
- const NumericIndex<int64_t>* numeric_index,
- const DocumentStore* document_store,
- const SchemaStore* schema_store,
- const Normalizer* normalizer,
- DocHitInfoIteratorFilter::Options filter_options,
- TermMatchType::Code match_type,
- bool needs_term_frequency_info)
+ explicit QueryVisitor(
+ Index* index, const NumericIndex<int64_t>* numeric_index,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const Normalizer* normalizer, const Tokenizer* tokenizer,
+ DocHitInfoIteratorFilter::Options filter_options,
+ TermMatchType::Code match_type, bool needs_term_frequency_info)
: index_(*index),
numeric_index_(*numeric_index),
document_store_(*document_store),
schema_store_(*schema_store),
normalizer_(*normalizer),
+ tokenizer_(*tokenizer),
filter_options_(std::move(filter_options)),
match_type_(match_type),
needs_term_frequency_info_(needs_term_frequency_info),
@@ -74,41 +73,85 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
private:
// A holder for intermediate results when processing child nodes.
- struct PendingValue {
- PendingValue() = default;
+ class PendingValue {
+ public:
+ enum class DataType {
+ kNone,
+ // Values of type STRING will eventually be converted to a
+ // DocHitInfoIterator further upstream.
+ kString,
+
+ // Values of type TEXT may be consumed as properties, numbers or converted
+ // to DocHitInfoIterators further upstream.
+ kText,
+ kDocIterator,
+ };
+
+ static PendingValue CreateStringPendingValue(std::string str) {
+ return PendingValue(std::move(str), DataType::kString);
+ }
+
+ static PendingValue CreateTextPendingValue(std::string text) {
+ return PendingValue(std::move(text), DataType::kText);
+ }
+
+ PendingValue() : data_type_(DataType::kNone) {}
explicit PendingValue(std::unique_ptr<DocHitInfoIterator> iterator)
- : iterator(std::move(iterator)) {}
-
- explicit PendingValue(std::string text) : text(std::move(text)) {}
+ : iterator_(std::move(iterator)), data_type_(DataType::kDocIterator) {}
// Placeholder is used to indicate where the children of a particular node
// begin.
- bool is_placeholder() const { return iterator == nullptr && text.empty(); }
+ bool is_placeholder() const { return data_type_ == DataType::kNone; }
+
+ DataType data_type() const { return data_type_; }
+
+ std::unique_ptr<DocHitInfoIterator>& iterator() { return iterator_; }
+ const std::unique_ptr<DocHitInfoIterator>& iterator() const {
+ return iterator_;
+ }
- bool holds_text() const { return iterator == nullptr && !text.empty(); }
+ std::string& term() { return term_; }
+ const std::string& term() const { return term_; }
- bool holds_iterator() const { return iterator != nullptr && text.empty(); }
+ private:
+ explicit PendingValue(std::string term, DataType data_type)
+ : term_(std::move(term)), data_type_(data_type) {}
- std::unique_ptr<DocHitInfoIterator> iterator;
- std::string text;
+ std::unique_ptr<DocHitInfoIterator> iterator_;
+ std::string term_;
+ DataType data_type_;
};
bool has_pending_error() const { return !pending_error_.ok(); }
+ // Creates a DocHitInfoIterator reflecting the provided term. Also populates,
+ // property_query_terms_map_ and query_term_iterators_ as appropriate.
+ // Returns:
+ // - On success, a DocHitInfoIterator for the provided term
+ // - INVALID_ARGUMENT if unable to create an iterator for the term.
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+ CreateTermIterator(const std::string& term);
+
// Processes the PendingValue at the top of pending_values_, parses it into a
// int64_t and pops the top.
// Returns:
// - On success, the int value stored in the text at the top
// - INVALID_ARGUMENT if pending_values_ is empty, doesn't hold a text or
// can't be parsed as an int.
- libtextclassifier3::StatusOr<int64_t> RetrieveIntValue();
+ libtextclassifier3::StatusOr<int64_t> PopPendingIntValue();
+
+ // Processes the PendingValue at the top of pending_values_ and pops the top.
+ // Returns:
+ // - On success, the string value stored in the text at the top
+ // - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a string.
+ libtextclassifier3::StatusOr<std::string> PopPendingStringValue();
// Processes the PendingValue at the top of pending_values_ and pops the top.
// Returns:
// - On success, the string value stored in the text at the top
// - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a text.
- libtextclassifier3::StatusOr<std::string> RetrieveStringValue();
+ libtextclassifier3::StatusOr<std::string> PopPendingTextValue();
// Processes the PendingValue at the top of pending_values_ and pops the top.
// Returns:
@@ -116,7 +159,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
// - INVALID_ARGUMENT if pending_values_ is empty or if unable to create an
// iterator for the term.
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
- RetrieveIterator();
+ PopPendingIterator();
// Processes all PendingValues at the top of pending_values_ until the first
// placeholder is encounter.
@@ -126,7 +169,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
// - INVALID_ARGUMENT if pending_values_is empty or if unable to create an
// iterator for any of the terms at the top of pending_values_
libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DocHitInfoIterator>>>
- RetrieveIterators();
+ PopAllPendingIterators();
// Processes the NumericComparator represented by node. This must be called
// *after* this node's children have been visited. The PendingValues added by
@@ -193,6 +236,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor {
const DocumentStore& document_store_; // Does not own!
const SchemaStore& schema_store_; // Does not own!
const Normalizer& normalizer_; // Does not own!
+ const Tokenizer& tokenizer_; // Does not own!
DocHitInfoIteratorFilter::Options filter_options_;
TermMatchType::Code match_type_;
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index f15bd7f..1577a3f 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -27,6 +27,7 @@
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/dummy-numeric-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
@@ -36,10 +37,16 @@
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -73,6 +80,7 @@ class QueryVisitorTest : public ::testing::Test {
void SetUp() override {
test_dir_ = GetTestTempDir() + "/icing";
index_dir_ = test_dir_ + "/index";
+ numeric_index_dir_ = test_dir_ + "/numeric_index";
store_dir_ = test_dir_ + "/store";
schema_store_dir_ = test_dir_ + "/schema_store";
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
@@ -80,6 +88,8 @@ class QueryVisitorTest : public ::testing::Test {
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+ jni_cache_ = GetTestJniCache();
+
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
// If we've specified using the reverse-JNI method for segmentation (i.e.
// not ICU), then we won't have the ICU data file included to set up.
@@ -107,10 +117,23 @@ class QueryVisitorTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
- numeric_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/1000));
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(
+ ULOC_US, jni_cache_.get());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(segmenter_options));
+
+ ICING_ASSERT_OK_AND_ASSIGN(tokenizer_,
+ tokenizer_factory::CreateIndexingTokenizer(
+ StringIndexingConfig::TokenizerType::PLAIN,
+ language_segmenter_.get()));
}
libtextclassifier3::StatusOr<std::unique_ptr<Node>> ParseQueryHelper(
@@ -126,6 +149,7 @@ class QueryVisitorTest : public ::testing::Test {
IcingFilesystem icing_filesystem_;
std::string test_dir_;
std::string index_dir_;
+ std::string numeric_index_dir_;
std::string schema_store_dir_;
std::string store_dir_;
Clock clock_;
@@ -134,6 +158,9 @@ class QueryVisitorTest : public ::testing::Test {
std::unique_ptr<Index> index_;
std::unique_ptr<DummyNumericIndex<int64_t>> numeric_index_;
std::unique_ptr<Normalizer> normalizer_;
+ std::unique_ptr<LanguageSegmenter> language_segmenter_;
+ std::unique_ptr<Tokenizer> tokenizer_;
+ std::unique_ptr<const JniCache> jni_cache_;
};
TEST_F(QueryVisitorTest, SimpleLessThan) {
@@ -157,7 +184,7 @@ TEST_F(QueryVisitorTest, SimpleLessThan) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -194,7 +221,7 @@ TEST_F(QueryVisitorTest, SimpleLessThanEq) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -231,7 +258,7 @@ TEST_F(QueryVisitorTest, SimpleEqual) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -268,7 +295,7 @@ TEST_F(QueryVisitorTest, SimpleGreaterThanEq) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -305,7 +332,7 @@ TEST_F(QueryVisitorTest, SimpleGreaterThan) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -344,7 +371,7 @@ TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanEqual) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -382,7 +409,7 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanEqual) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -421,7 +448,7 @@ TEST_F(QueryVisitorTest, NestedPropertyLessThan) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -443,7 +470,7 @@ TEST_F(QueryVisitorTest, IntParsingError) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -457,7 +484,7 @@ TEST_F(QueryVisitorTest, NotEqualsUnsupported) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -502,7 +529,7 @@ TEST_F(QueryVisitorTest, LessThanTooManyOperandsInvalid) {
auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -526,7 +553,7 @@ TEST_F(QueryVisitorTest, LessThanTooFewOperandsInvalid) {
auto root_node = std::make_unique<NaryOperatorNode>("<", std::move(args));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -559,7 +586,7 @@ TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -570,7 +597,7 @@ TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) {
TEST_F(QueryVisitorTest, NeverVisitedReturnsInvalid) {
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
@@ -600,7 +627,7 @@ TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -630,7 +657,7 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -638,41 +665,19 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(QueryVisitorTest, SingleTerm) {
- // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and
- // "bar" respectively.
- Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
- TERM_MATCH_PREFIX, /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
-
- editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
- /*namespace_id=*/0);
- editor.BufferTerm("foo");
- editor.IndexAllBufferedTerms();
-
- editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
- /*namespace_id=*/0);
- editor.BufferTerm("bar");
- editor.IndexAllBufferedTerms();
-
- std::string query = "foo";
+TEST_F(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) {
+ // "price" is a STRING token, which cannot be a property name.
+ std::string query = R"("price" > 7)";
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
- ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
- std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
- UnorderedElementsAre("foo"));
- EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
- EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo"));
- EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
- ElementsAre(kDocumentId1, kDocumentId0));
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
@@ -698,7 +703,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -750,7 +755,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyDisabled) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/false);
root_node->Accept(&query_visitor);
@@ -801,7 +806,7 @@ TEST_F(QueryVisitorTest, SingleVerbatimTerm) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -854,7 +859,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -900,7 +905,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -948,7 +953,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -969,7 +974,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1018,7 +1023,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1038,7 +1043,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1081,7 +1086,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1132,7 +1137,7 @@ TEST_F(QueryVisitorTest, SingleMinusTerm) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1179,7 +1184,7 @@ TEST_F(QueryVisitorTest, SingleNotTerm) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1231,7 +1236,7 @@ TEST_F(QueryVisitorTest, NestedNotTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1297,7 +1302,7 @@ TEST_F(QueryVisitorTest, DeeplyNestedNotTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1335,7 +1340,7 @@ TEST_F(QueryVisitorTest, ImplicitAndTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1373,7 +1378,7 @@ TEST_F(QueryVisitorTest, ExplicitAndTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1411,7 +1416,7 @@ TEST_F(QueryVisitorTest, OrTerms) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1451,7 +1456,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1471,7 +1476,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1490,7 +1495,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_three(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_three);
@@ -1545,7 +1550,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1564,7 +1569,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1625,7 +1630,7 @@ TEST_F(QueryVisitorTest, PropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1641,6 +1646,99 @@ TEST_F(QueryVisitorTest, PropertyFilter) {
ElementsAre(kDocumentId1, kDocumentId0));
}
+TEST_F(QueryVisitorTest, PropertyFilterStringIsInvalid) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build()));
+
+ // "prop1" is a STRING token, which cannot be a property name.
+ std::string query = R"("prop1":foo)";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ EXPECT_THAT(std::move(query_visitor).ConsumeResults(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QueryVisitorTest, PropertyFilterNonNormalized) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("PROP1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("PROP2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = "PROP1:foo";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms),
+ UnorderedElementsAre("PROP1"));
+ EXPECT_THAT(query_results.query_terms["PROP1"], UnorderedElementsAre("foo"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("foo"));
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId1, kDocumentId0));
+}
+
TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
@@ -1687,7 +1785,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1750,7 +1848,7 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1769,7 +1867,7 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1831,7 +1929,7 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1847,7 +1945,7 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1906,7 +2004,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
ParseQueryHelper(query));
QueryVisitor query_visitor(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor);
@@ -1922,7 +2020,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query));
QueryVisitor query_visitor_two(
index_.get(), numeric_index_.get(), document_store_.get(),
- schema_store_.get(), normalizer_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
/*needs_term_frequency_info_=*/true);
root_node->Accept(&query_visitor_two);
@@ -1935,6 +2033,91 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) {
ElementsAre(kDocumentId2));
}
+TEST_F(QueryVisitorTest, SegmentationTest) {
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("type")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build()));
+ // Section ids are assigned alphabetically.
+ SectionId prop1_section_id = 0;
+ SectionId prop2_section_id = 1;
+
+ // ICU segmentation will break this into "每天" and "上班".
+ // CFStringTokenizer (ios) will break this into "每", "天" and "上班"
+ std::string query = "每天上班";
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("上班");
+ editor.IndexAllBufferedTerms();
+ editor = index_->Edit(kDocumentId0, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ if (IsCfStringTokenization()) {
+ editor.BufferTerm("每");
+ editor.BufferTerm("天");
+ } else {
+ editor.BufferTerm("每天");
+ }
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("上班");
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
+ editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ if (IsCfStringTokenization()) {
+ editor.BufferTerm("每");
+ editor.BufferTerm("天");
+ } else {
+ editor.BufferTerm("每天");
+ }
+ editor.IndexAllBufferedTerms();
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(),
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use, IsEmpty());
+ EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre(""));
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("每", "天", "上班"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("每", "天", "上班"));
+ } else {
+ EXPECT_THAT(query_results.query_terms[""],
+ UnorderedElementsAre("每天", "上班"));
+ EXPECT_THAT(ExtractKeys(query_results.query_term_iterators),
+ UnorderedElementsAre("每天", "上班"));
+ }
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ ElementsAre(kDocumentId0));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 17b2acb..47f109c 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -151,18 +151,8 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
if (search_spec.search_type() ==
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
ICING_VLOG(1) << "Using EXPERIMENTAL_ICING_ADVANCED_QUERY parser!";
- libtextclassifier3::StatusOr<QueryResults> results_or =
- ParseAdvancedQuery(search_spec, ranking_strategy);
- if (results_or.ok()) {
- results = std::move(results_or).ValueOrDie();
- } else {
- ICING_VLOG(1)
- << "Unable to parse query using advanced query parser. Error: "
- << results_or.status().error_message()
- << ". Falling back to old query parser.";
- ICING_ASSIGN_OR_RETURN(results,
- ParseRawQuery(search_spec, ranking_strategy));
- }
+ ICING_ASSIGN_OR_RETURN(results,
+ ParseAdvancedQuery(search_spec, ranking_strategy));
} else {
ICING_ASSIGN_OR_RETURN(results,
ParseRawQuery(search_spec, ranking_strategy));
@@ -204,13 +194,17 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
document_store_.last_added_document_id());
return results;
}
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<Tokenizer> plain_tokenizer,
+ tokenizer_factory::CreateIndexingTokenizer(
+ StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
bool needs_term_frequency_info =
ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
- QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
- &schema_store_, &normalizer_, std::move(options),
- search_spec.term_match_type(),
- needs_term_frequency_info);
+ QueryVisitor query_visitor(
+ &index_, &numeric_index_, &document_store_, &schema_store_, &normalizer_,
+ plain_tokenizer.get(), std::move(options), search_spec.term_match_type(),
+ needs_term_frequency_info);
tree_root->Accept(&query_visitor);
return std::move(query_visitor).ConsumeResults();
}
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 6d776ce..111b598 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -57,8 +57,8 @@
// $ adb push blaze-bin/icing/query/query-processor_benchmark
// /data/local/tmp/
//
-// $ adb shell /data/local/tmp/query-processor_benchmark --benchmark_filter=all
-// --adb
+// $ adb shell /data/local/tmp/query-processor_benchmark
+// --benchmark_filter=all --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
// the benchmark will set up data files accordingly.
@@ -103,6 +103,7 @@ void BM_QueryOneTerm(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -116,7 +117,9 @@ void BM_QueryOneTerm(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -226,6 +229,7 @@ void BM_QueryFiveTerms(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -239,7 +243,9 @@ void BM_QueryFiveTerms(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -367,6 +373,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -380,7 +387,9 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -493,6 +502,7 @@ void BM_QueryHiragana(benchmark::State& state) {
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark";
const std::string index_dir = base_dir + "/index";
+ const std::string numeric_index_dir = base_dir + "/numeric_index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
@@ -506,7 +516,9 @@ void BM_QueryHiragana(benchmark::State& state) {
std::unique_ptr<Index> index =
CreateIndex(icing_filesystem, filesystem, index_dir);
// TODO(b/249829533): switch to use persistent numeric index.
- auto numeric_index = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto numeric_index,
+ DummyNumericIndex<int64_t>::Create(filesystem, numeric_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index b807b14..7842a9a 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -70,7 +70,8 @@ class QueryProcessorTest
: test_dir_(GetTestTempDir() + "/icing"),
store_dir_(test_dir_ + "/store"),
schema_store_dir_(test_dir_ + "/schema_store"),
- index_dir_(test_dir_ + "/index") {}
+ index_dir_(test_dir_ + "/index"),
+ numeric_index_dir_(test_dir_ + "/numeric_index") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
@@ -104,7 +105,9 @@ class QueryProcessorTest
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// TODO(b/249829533): switch to use persistent numeric index.
- numeric_index_ = std::make_unique<DummyNumericIndex<int64_t>>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ numeric_index_,
+ DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
@@ -154,6 +157,7 @@ class QueryProcessorTest
private:
IcingFilesystem icing_filesystem_;
const std::string index_dir_;
+ const std::string numeric_index_dir_;
protected:
std::unique_ptr<Index> index_;
@@ -223,17 +227,25 @@ TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
SearchSpecProto search_spec;
search_spec.set_query("()");
search_spec.set_search_type(GetParam());
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(search_spec,
+ ScoringSpecProto::RankingStrategy::NONE));
- ICING_ASSERT_OK_AND_ASSIGN(
- QueryResults results,
- query_processor_->ParseSearch(search_spec,
- ScoringSpecProto::RankingStrategy::NONE));
-
- // Descending order of valid DocumentIds
- EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
- ElementsAre(document_id2, document_id1));
- EXPECT_THAT(results.query_terms, IsEmpty());
- EXPECT_THAT(results.query_term_iterators, IsEmpty());
+ // Descending order of valid DocumentIds
+ EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
+ ElementsAre(document_id2, document_id1));
+ EXPECT_THAT(results.query_terms, IsEmpty());
+ EXPECT_THAT(results.query_term_iterators, IsEmpty());
+ } else {
+ // TODO(b/208654892): Resolve the difference between RAW_QUERY and ADVANCED
+ // regarding empty composite expressions.
+ EXPECT_THAT(query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::NONE),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ }
}
TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.cc b/icing/scoring/advanced_scoring/advanced-scorer.cc
index 212a476..771615c 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer.cc
@@ -30,7 +30,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<AdvancedScorer>>
AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
double default_score,
const DocumentStore* document_store,
- const SchemaStore* schema_store) {
+ const SchemaStore* schema_store,
+ const JoinChildrenFetcher* join_children_fetcher) {
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -48,15 +49,14 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec,
std::make_unique<Bm25fCalculator>(document_store,
std::move(section_weights));
ScoringVisitor visitor(default_score, document_store, schema_store,
- bm25f_calculator.get());
+ bm25f_calculator.get(), join_children_fetcher);
tree_root->Accept(&visitor);
ICING_ASSIGN_OR_RETURN(std::unique_ptr<ScoreExpression> expression,
std::move(visitor).Expression());
- if (expression->is_document_type()) {
+ if (expression->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
- "The root scoring expression will always be evaluated to a document, "
- "but a number is expected.");
+ "The root scoring expression is not of double type.");
}
return std::unique_ptr<AdvancedScorer>(new AdvancedScorer(
std::move(expression), std::move(bm25f_calculator), default_score));
diff --git a/icing/scoring/advanced_scoring/advanced-scorer.h b/icing/scoring/advanced_scoring/advanced-scorer.h
index 763499b..1a1cd5c 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer.h
+++ b/icing/scoring/advanced_scoring/advanced-scorer.h
@@ -20,6 +20,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/advanced_scoring/score-expression.h"
#include "icing/scoring/bm25f-calculator.h"
@@ -37,7 +38,8 @@ class AdvancedScorer : public Scorer {
// INVALID_ARGUMENT if fails to create an instance
static libtextclassifier3::StatusOr<std::unique_ptr<AdvancedScorer>> Create(
const ScoringSpecProto& scoring_spec, double default_score,
- const DocumentStore* document_store, const SchemaStore* schema_store);
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const JoinChildrenFetcher* join_children_fetcher = nullptr);
double GetScore(const DocHitInfo& hit_info,
const DocHitInfoIterator* query_it) override {
diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc
index a8749df..6393645 100644
--- a/icing/scoring/advanced_scoring/score-expression.cc
+++ b/icing/scoring/advanced_scoring/score-expression.cc
@@ -17,6 +17,18 @@
namespace icing {
namespace lib {
+namespace {
+
+libtextclassifier3::Status CheckChildrenNotNull(
+ const std::vector<std::unique_ptr<ScoreExpression>>& children) {
+ for (const auto& child : children) {
+ ICING_RETURN_ERROR_IF_NULL(child);
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace
+
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
OperatorScoreExpression::Create(
OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children) {
@@ -24,12 +36,13 @@ OperatorScoreExpression::Create(
return absl_ports::InvalidArgumentError(
"OperatorScoreExpression must have at least one argument.");
}
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+
bool children_all_constant_double = true;
for (const auto& child : children) {
- ICING_RETURN_ERROR_IF_NULL(child);
- if (child->is_document_type()) {
+ if (child->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
- "Operators are not supported for document type.");
+ "Operators are only supported for double type.");
}
if (!child->is_constant_double()) {
children_all_constant_double = false;
@@ -54,7 +67,7 @@ OperatorScoreExpression::Create(
}
libtextclassifier3::StatusOr<double> OperatorScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
// The Create factory guarantees that an operator will have at least one
// child.
ICING_ASSIGN_OR_RETURN(double res, children_.at(0)->eval(hit_info, query_it));
@@ -97,6 +110,10 @@ const std::unordered_map<std::string, MathFunctionScoreExpression::FunctionType>
{"sin", FunctionType::kSin}, {"cos", FunctionType::kCos},
{"tan", FunctionType::kTan}};
+const std::unordered_set<MathFunctionScoreExpression::FunctionType>
+ MathFunctionScoreExpression::kVariableArgumentsFunctions = {
+ FunctionType::kMax, FunctionType::kMin};
+
libtextclassifier3::StatusOr<std::unique_ptr<ScoreExpression>>
MathFunctionScoreExpression::Create(
FunctionType function_type,
@@ -105,12 +122,26 @@ MathFunctionScoreExpression::Create(
return absl_ports::InvalidArgumentError(
"Math functions must have at least one argument.");
}
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+
+ // Received a list type in the function argument.
+ if (children.size() == 1 &&
+ children[0]->type() == ScoreExpressionType::kDoubleList) {
+ // Only certain functions support list type.
+ if (kVariableArgumentsFunctions.count(function_type) > 0) {
+ return std::unique_ptr<MathFunctionScoreExpression>(
+ new MathFunctionScoreExpression(function_type, std::move(children)));
+ }
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Received an unsupported list type argument in the math function."));
+ }
+
bool children_all_constant_double = true;
for (const auto& child : children) {
- ICING_RETURN_ERROR_IF_NULL(child);
- if (child->is_document_type()) {
+ if (child->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
- "Math functions are not supported for document type.");
+ "Got an invalid type for the math function. Should expect a double "
+ "type argument.");
}
if (!child->is_constant_double()) {
children_all_constant_double = false;
@@ -172,11 +203,16 @@ MathFunctionScoreExpression::Create(
}
libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
std::vector<double> values;
- for (const auto& child : children_) {
- ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it));
- values.push_back(v);
+ if (children_.at(0)->type() == ScoreExpressionType::kDoubleList) {
+ ICING_ASSIGN_OR_RETURN(values,
+ children_.at(0)->eval_list(hit_info, query_it));
+ } else {
+ for (const auto& child : children_) {
+ ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it));
+ values.push_back(v);
+ }
}
double res = 0;
@@ -194,9 +230,17 @@ libtextclassifier3::StatusOr<double> MathFunctionScoreExpression::eval(
res = pow(values[0], values[1]);
break;
case FunctionType::kMax:
+ if (values.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Got an empty parameter set in max function");
+ }
res = *std::max_element(values.begin(), values.end());
break;
case FunctionType::kMin:
+ if (values.empty()) {
+ return absl_ports::InvalidArgumentError(
+ "Got an empty parameter set in min function");
+ }
res = *std::min_element(values.begin(), values.end());
break;
case FunctionType::kSqrt:
@@ -240,10 +284,9 @@ DocumentFunctionScoreExpression::Create(
return absl_ports::InvalidArgumentError(
"Document-based functions must have at least one argument.");
}
- for (const auto& child : children) {
- ICING_RETURN_ERROR_IF_NULL(child);
- }
- if (!children[0]->is_document_type()) {
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+
+ if (children[0]->type() != ScoreExpressionType::kDocument) {
return absl_ports::InvalidArgumentError(
"The first parameter of document-based functions must be \"this\".");
}
@@ -259,7 +302,8 @@ DocumentFunctionScoreExpression::Create(
case FunctionType::kUsageCount:
[[fallthrough]];
case FunctionType::kUsageLastUsedTimestamp:
- if (children.size() != 2 || children[1]->is_document_type()) {
+ if (children.size() != 2 ||
+ children[1]->type() != ScoreExpressionType::kDouble) {
return absl_ports::InvalidArgumentError(
"UsageCount/UsageLastUsedTimestamp must have 2 arguments. The "
"first argument should be \"this\", and the second argument "
@@ -273,7 +317,7 @@ DocumentFunctionScoreExpression::Create(
}
libtextclassifier3::StatusOr<double> DocumentFunctionScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
switch (function_type_) {
case FunctionType::kDocumentScore:
[[fallthrough]];
@@ -330,8 +374,9 @@ RelevanceScoreFunctionScoreExpression::Create(
return absl_ports::InvalidArgumentError(
"relevanceScore must have 1 argument.");
}
- ICING_RETURN_ERROR_IF_NULL(children[0]);
- if (!children[0]->is_document_type()) {
+ ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children));
+
+ if (children[0]->type() != ScoreExpressionType::kDocument) {
return absl_ports::InvalidArgumentError(
"relevanceScore must take \"this\" as its argument.");
}
@@ -342,7 +387,7 @@ RelevanceScoreFunctionScoreExpression::Create(
libtextclassifier3::StatusOr<double>
RelevanceScoreFunctionScoreExpression::eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) {
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
if (query_it == nullptr) {
return default_score_;
}
diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h
index f80da33..047a175 100644
--- a/icing/scoring/advanced_scoring/score-expression.h
+++ b/icing/scoring/advanced_scoring/score-expression.h
@@ -19,6 +19,7 @@
#include <cmath>
#include <memory>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -31,6 +32,12 @@
namespace icing {
namespace lib {
+enum class ScoreExpressionType {
+ kDouble,
+ kDoubleList,
+ kDocument // Only "this" is considered as document type.
+};
+
class ScoreExpression {
public:
virtual ~ScoreExpression() = default;
@@ -43,10 +50,32 @@ class ScoreExpression {
// expression.
// - INTERNAL if there are inconsistencies.
virtual libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) = 0;
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+ if (type() == ScoreExpressionType::kDouble) {
+ return absl_ports::UnimplementedError(
+ "All ScoreExpressions of type Double must provide their own "
+ "implementation of eval!");
+ }
+ return absl_ports::InternalError(
+ "Runtime type error: the expression should never be evaluated to a "
+ "double. There must be inconsistencies in the static type checking.");
+ }
- // Indicate whether the current expression is of document type
- virtual bool is_document_type() const { return false; }
+ virtual libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const {
+ if (type() == ScoreExpressionType::kDoubleList) {
+ return absl_ports::UnimplementedError(
+ "All ScoreExpressions of type Double List must provide their own "
+ "implementation of eval_list!");
+ }
+ return absl_ports::InternalError(
+ "Runtime type error: the expression should never be evaluated to a "
+ "double list. There must be inconsistencies in the static type "
+ "checking.");
+ }
+
+ // Indicate the type to which the current expression will be evaluated.
+ virtual ScoreExpressionType type() const = 0;
// Indicate whether the current expression is a constant double.
// Returns true if and only if the object is of ConstantScoreExpression type.
@@ -59,15 +88,10 @@ class ThisExpression : public ScoreExpression {
return std::unique_ptr<ThisExpression>(new ThisExpression());
}
- libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override {
- return absl_ports::InternalError(
- "Should never reach here to evaluate a document type as double. "
- "There must be inconsistencies.");
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDocument;
}
- bool is_document_type() const override { return true; }
-
private:
ThisExpression() = default;
};
@@ -81,10 +105,14 @@ class ConstantScoreExpression : public ScoreExpression {
}
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo&, const DocHitInfoIterator*) override {
+ const DocHitInfo&, const DocHitInfoIterator*) const override {
return c_;
}
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
+
bool is_constant_double() const override { return true; }
private:
@@ -107,7 +135,12 @@ class OperatorScoreExpression : public ScoreExpression {
OperatorType op, std::vector<std::unique_ptr<ScoreExpression>> children);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit OperatorScoreExpression(
@@ -134,6 +167,8 @@ class MathFunctionScoreExpression : public ScoreExpression {
static const std::unordered_map<std::string, FunctionType> kFunctionNames;
+ static const std::unordered_set<FunctionType> kVariableArgumentsFunctions;
+
// RETURNS:
// - A MathFunctionScoreExpression instance on success if not simplifiable.
// - A ConstantScoreExpression instance on success if simplifiable.
@@ -144,7 +179,12 @@ class MathFunctionScoreExpression : public ScoreExpression {
std::vector<std::unique_ptr<ScoreExpression>> children);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit MathFunctionScoreExpression(
@@ -178,7 +218,12 @@ class DocumentFunctionScoreExpression : public ScoreExpression {
const DocumentStore* document_store, double default_score);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit DocumentFunctionScoreExpression(
@@ -210,7 +255,12 @@ class RelevanceScoreFunctionScoreExpression : public ScoreExpression {
Bm25fCalculator* bm25f_calculator, double default_score);
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override;
+ const DocHitInfo& hit_info,
+ const DocHitInfoIterator* query_it) const override;
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
private:
explicit RelevanceScoreFunctionScoreExpression(
diff --git a/icing/scoring/advanced_scoring/score-expression_test.cc b/icing/scoring/advanced_scoring/score-expression_test.cc
index b49b658..07c9d76 100644
--- a/icing/scoring/advanced_scoring/score-expression_test.cc
+++ b/icing/scoring/advanced_scoring/score-expression_test.cc
@@ -39,13 +39,39 @@ class NonConstantScoreExpression : public ScoreExpression {
}
libtextclassifier3::StatusOr<double> eval(
- const DocHitInfo&, const DocHitInfoIterator*) override {
+ const DocHitInfo &, const DocHitInfoIterator *) const override {
return 0;
}
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDouble;
+ }
+
bool is_constant_double() const override { return false; }
};
+class ListScoreExpression : public ScoreExpression {
+ public:
+ static std::unique_ptr<ListScoreExpression> Create(
+ const std::vector<double> &values) {
+ std::unique_ptr<ListScoreExpression> res =
+ std::make_unique<ListScoreExpression>();
+ res->values = values;
+ return res;
+ }
+
+ libtextclassifier3::StatusOr<std::vector<double>> eval_list(
+ const DocHitInfo &, const DocHitInfoIterator *) const override {
+ return values;
+ }
+
+ ScoreExpressionType type() const override {
+ return ScoreExpressionType::kDoubleList;
+ }
+
+ std::vector<double> values;
+};
+
template <typename... Args>
std::vector<std::unique_ptr<ScoreExpression>> MakeChildren(Args... args) {
std::vector<std::unique_ptr<ScoreExpression>> children;
@@ -180,6 +206,82 @@ TEST(ScoreExpressionTest, CannotSimplifyNonConstant) {
ASSERT_FALSE(expression->is_constant_double());
}
+TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgument) {
+ // max({1, 2, 3}) = 3
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ScoreExpression> expression,
+ MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3)));
+
+ // min({1, 2, 3}) = 1
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMin,
+ MakeChildren(ListScoreExpression::Create({1, 2, 3}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1)));
+
+ // max({4}) = 4
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ListScoreExpression::Create({4}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(4)));
+
+ // min({5}) = 5
+ ICING_ASSERT_OK_AND_ASSIGN(
+ expression, MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMin,
+ MakeChildren(ListScoreExpression::Create({5}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(5)));
+}
+
+TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgumentError) {
+ // max({}) = evaluation error, since max on empty list does not produce a
+ // valid result.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ScoreExpression> expression,
+ MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ListScoreExpression::Create({}))));
+ EXPECT_THAT(expression->eval(DocHitInfo(), nullptr),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // max(1, {2}) = type error, since max must take either n > 0 parameters of
+ // type double, or a single parameter of type list.
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMax,
+ MakeChildren(ConstantScoreExpression::Create(1),
+ ListScoreExpression::Create({2}))),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // min({1}, {2}) = type error, since min must take either n > 0 parameters of
+ // type double, or a single parameter of type list.
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kMin,
+ MakeChildren(ListScoreExpression::Create({1}),
+ ListScoreExpression::Create({2}))),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // sin({1}) = type error, since sin does not support list type parameters.
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kSin,
+ MakeChildren(ListScoreExpression::Create({1}))),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(ScoreExpressionTest, ChildrenCannotBeNull) {
+ EXPECT_THAT(OperatorScoreExpression::Create(
+ OperatorScoreExpression::OperatorType::kPlus,
+ MakeChildren(ConstantScoreExpression::Create(1), nullptr)),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(MathFunctionScoreExpression::Create(
+ MathFunctionScoreExpression::FunctionType::kPow,
+ MakeChildren(ConstantScoreExpression::Create(2), nullptr)),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/scoring/advanced_scoring/scoring-visitor.h b/icing/scoring/advanced_scoring/scoring-visitor.h
index 539af2d..9b01f73 100644
--- a/icing/scoring/advanced_scoring/scoring-visitor.h
+++ b/icing/scoring/advanced_scoring/scoring-visitor.h
@@ -17,6 +17,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/scoring.pb.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
@@ -32,11 +33,13 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
explicit ScoringVisitor(double default_score,
const DocumentStore* document_store,
const SchemaStore* schema_store,
- Bm25fCalculator* bm25f_calculator)
+ Bm25fCalculator* bm25f_calculator,
+ const JoinChildrenFetcher* join_children_fetcher)
: default_score_(default_score),
document_store_(*document_store),
schema_store_(*schema_store),
- bm25f_calculator_(*bm25f_calculator) {}
+ bm25f_calculator_(*bm25f_calculator),
+ join_children_fetcher_(join_children_fetcher) {}
void VisitFunctionName(const FunctionNameNode* node) override;
void VisitString(const StringNode* node) override;
@@ -86,6 +89,8 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor {
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
Bm25fCalculator& bm25f_calculator_;
+ // A non-null join_children_fetcher_ indicates scoring in a join.
+ const JoinChildrenFetcher* join_children_fetcher_; // Does not own.
libtextclassifier3::Status pending_error_;
std::vector<std::unique_ptr<ScoreExpression>> stack;
diff --git a/icing/scoring/scorer-factory.cc b/icing/scoring/scorer-factory.cc
index f75b564..c647642 100644
--- a/icing/scoring/scorer-factory.cc
+++ b/icing/scoring/scorer-factory.cc
@@ -163,7 +163,8 @@ namespace scorer_factory {
libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
const ScoringSpecProto& scoring_spec, double default_score,
- const DocumentStore* document_store, const SchemaStore* schema_store) {
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const JoinChildrenFetcher* join_children_fetcher) {
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -211,7 +212,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
"Advanced scoring is enabled, but the expression is empty!");
}
return AdvancedScorer::Create(scoring_spec, default_score, document_store,
- schema_store);
+ schema_store, join_children_fetcher);
case ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE:
// Use join aggregate score to rank. Since the aggregation score is
// calculated by child documents after joining (in JoinProcessor), we can
diff --git a/icing/scoring/scorer-factory.h b/icing/scoring/scorer-factory.h
index 8c19c75..460e5bb 100644
--- a/icing/scoring/scorer-factory.h
+++ b/icing/scoring/scorer-factory.h
@@ -16,6 +16,7 @@
#define ICING_SCORING_SCORER_FACTORY_H_
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/scoring/scorer.h"
#include "icing/store/document-store.h"
@@ -36,7 +37,8 @@ namespace scorer_factory {
// INVALID_ARGUMENT if fails to create an instance
libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
const ScoringSpecProto& scoring_spec, double default_score,
- const DocumentStore* document_store, const SchemaStore* schema_store);
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const JoinChildrenFetcher* join_children_fetcher = nullptr);
} // namespace scorer_factory
diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
index 571a112..894852d 100644
--- a/icing/scoring/scoring-processor.cc
+++ b/icing/scoring/scoring-processor.cc
@@ -43,7 +43,8 @@ constexpr double kDefaultScoreInAscendingOrder =
libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
const DocumentStore* document_store,
- const SchemaStore* schema_store) {
+ const SchemaStore* schema_store,
+ const JoinChildrenFetcher* join_children_fetcher) {
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -52,11 +53,11 @@ ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<Scorer> scorer,
- scorer_factory::Create(scoring_spec,
- is_descending_order
- ? kDefaultScoreInDescendingOrder
- : kDefaultScoreInAscendingOrder,
- document_store, schema_store));
+ scorer_factory::Create(
+ scoring_spec,
+ is_descending_order ? kDefaultScoreInDescendingOrder
+ : kDefaultScoreInAscendingOrder,
+ document_store, schema_store, join_children_fetcher));
// Using `new` to access a non-public constructor.
return std::unique_ptr<ScoringProcessor>(
new ScoringProcessor(std::move(scorer)));
diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
index e7d09b1..9cd4d85 100644
--- a/icing/scoring/scoring-processor.h
+++ b/icing/scoring/scoring-processor.h
@@ -21,6 +21,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/proto/scoring.pb.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/scoring/scorer.h"
@@ -41,7 +42,8 @@ class ScoringProcessor {
// FAILED_PRECONDITION on any null pointer input
static libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>> Create(
const ScoringSpecProto& scoring_spec, const DocumentStore* document_store,
- const SchemaStore* schema_store);
+ const SchemaStore* schema_store,
+ const JoinChildrenFetcher* join_children_fetcher = nullptr);
// Assigns scores to DocHitInfos from the given DocHitInfoIterator and returns
// a vector of ScoredDocumentHits. The size of results is no more than
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 9e79790..2a7e108 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -1873,8 +1873,7 @@ libtextclassifier3::Status DocumentStore::SetUsageScores(
libtextclassifier3::StatusOr<
google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
DocumentStore::CollectCorpusInfo() const {
- google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
- corpus_info;
+ google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo> corpus_info;
libtextclassifier3::StatusOr<const SchemaProto*> schema_proto_or =
schema_store_->GetSchema();
if (!schema_proto_or.ok()) {
@@ -1919,10 +1918,10 @@ DocumentStore::GetDebugInfo(int verbosity) const {
ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
debug_info.set_crc(crc.Get());
if (verbosity > 0) {
- ICING_ASSIGN_OR_RETURN(google::protobuf::RepeatedPtrField<
- DocumentDebugInfoProto::CorpusInfo>
- corpus_info,
- CollectCorpusInfo());
+ ICING_ASSIGN_OR_RETURN(
+ google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
+ corpus_info,
+ CollectCorpusInfo());
*debug_info.mutable_corpus_info() = std::move(corpus_info);
}
return debug_info;
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index bda351d..3e02636 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -730,8 +730,8 @@ class DocumentStore {
// Returns:
// - on success, a RepeatedPtrField for CorpusInfo collected.
// - OUT_OF_RANGE, this should never happen.
- libtextclassifier3::StatusOr<google::protobuf::RepeatedPtrField<
- DocumentDebugInfoProto::CorpusInfo>>
+ libtextclassifier3::StatusOr<
+ google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
CollectCorpusInfo() const;
};
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7cf951a..a115e11 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -88,7 +88,9 @@ const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
// Didn't find our namespace, fail the test.
EXPECT_TRUE(false) << "Failed to find namespace '" << name_space
<< "' in DocumentStorageInfoProto.";
- return std::move(NamespaceStorageInfoProto());
+ static const auto& default_namespace_storage_info =
+ *new NamespaceStorageInfoProto();
+ return default_namespace_storage_info;
}
UsageReport CreateUsageReport(std::string name_space, std::string uri,
diff --git a/icing/store/key-mapper_benchmark.cc b/icing/store/key-mapper_benchmark.cc
index b649bc7..1ce54c7 100644
--- a/icing/store/key-mapper_benchmark.cc
+++ b/icing/store/key-mapper_benchmark.cc
@@ -35,6 +35,7 @@ namespace lib {
namespace {
using ::testing::Eq;
+using ::testing::IsTrue;
using ::testing::Not;
class KeyMapperBenchmark {
@@ -78,8 +79,10 @@ class KeyMapperBenchmark {
template <>
libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<int>>>
CreateKeyMapper<PersistentHashMapKeyMapper<int>>(int max_num_entries) {
+ std::string working_path =
+ absl_ports::StrCat(base_dir, "/", "key_mapper_dir");
return PersistentHashMapKeyMapper<int>::Create(
- filesystem, base_dir, max_num_entries,
+ filesystem, std::move(working_path), max_num_entries,
/*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int),
/*max_load_factor_percent=*/100);
}
@@ -109,6 +112,7 @@ void BM_PutMany(benchmark::State& state) {
state.PauseTiming();
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<KeyMapper<int>> key_mapper,
benchmark.CreateKeyMapper<KeyMapperType>(num_keys));
@@ -166,6 +170,7 @@ void BM_Put(benchmark::State& state) {
KeyMapperBenchmark benchmark;
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
// The overhead of state.PauseTiming is too large and affects the benchmark
// result a lot, so pre-generate enough kvps to avoid calling too many times
@@ -206,6 +211,7 @@ void BM_Get(benchmark::State& state) {
KeyMapperBenchmark benchmark;
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
// Create a key mapper with num_keys entries.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -260,6 +266,7 @@ void BM_Iterator(benchmark::State& state) {
KeyMapperBenchmark benchmark;
benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str());
DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir);
+ ASSERT_THAT(ddir.is_valid(), IsTrue());
// Create a key mapper with num_keys entries.
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/store/key-mapper_test.cc b/icing/store/key-mapper_test.cc
index 682888d..1367c2d 100644
--- a/icing/store/key-mapper_test.cc
+++ b/icing/store/key-mapper_test.cc
@@ -32,6 +32,7 @@
#include "icing/testing/tmp-directory.h"
using ::testing::IsEmpty;
+using ::testing::IsTrue;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
@@ -47,7 +48,13 @@ class KeyMapperTest : public ::testing::Test {
protected:
using KeyMapperType = T;
- void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/icing";
+ ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+ IsTrue());
+
+ working_dir_ = base_dir_ + "/key_mapper";
+ }
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
@@ -63,17 +70,18 @@ class KeyMapperTest : public ::testing::Test {
libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>>
CreateKeyMapper<DynamicTrieKeyMapper<DocumentId>>() {
return DynamicTrieKeyMapper<DocumentId>::Create(
- filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize);
+ filesystem_, working_dir_, kMaxDynamicTrieKeyMapperSize);
}
template <>
libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>>
CreateKeyMapper<PersistentHashMapKeyMapper<DocumentId>>() {
return PersistentHashMapKeyMapper<DocumentId>::Create(filesystem_,
- base_dir_);
+ working_dir_);
}
std::string base_dir_;
+ std::string working_dir_;
Filesystem filesystem_;
};
@@ -175,15 +183,15 @@ TYPED_TEST(KeyMapperTest, CanUseAcrossMultipleInstances) {
TYPED_TEST(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
// Can delete even if there's nothing there
- ICING_EXPECT_OK(
- TestFixture::KeyMapperType::Delete(this->filesystem_, this->base_dir_));
+ ICING_EXPECT_OK(TestFixture::KeyMapperType::Delete(this->filesystem_,
+ this->working_dir_));
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
this->template CreateKeyMapper<TypeParam>());
ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
ICING_EXPECT_OK(key_mapper->PersistToDisk());
- ICING_EXPECT_OK(
- TestFixture::KeyMapperType::Delete(this->filesystem_, this->base_dir_));
+ ICING_EXPECT_OK(TestFixture::KeyMapperType::Delete(this->filesystem_,
+ this->working_dir_));
key_mapper.reset();
ICING_ASSERT_OK_AND_ASSIGN(key_mapper,
diff --git a/icing/store/persistent-hash-map-key-mapper.h b/icing/store/persistent-hash-map-key-mapper.h
index a13ec11..5f83e6f 100644
--- a/icing/store/persistent-hash-map-key-mapper.h
+++ b/icing/store/persistent-hash-map-key-mapper.h
@@ -43,11 +43,13 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
// Returns any encountered IO errors.
//
// filesystem: Object to make system level calls
- // base_dir : Base directory used to save all the files required to persist
- // PersistentHashMapKeyMapper. If this base_dir was previously used
- // to create a PersistentHashMapKeyMapper, then this existing data
- // would be loaded. Otherwise, an empty PersistentHashMapKeyMapper
- // would be created.
+ // working_path: Working directory used to save all the files required to
+ // persist PersistentHashMapKeyMapper. If this working_path was
+ // previously used to create a PersistentHashMapKeyMapper, then
+ // this existing data would be loaded. Otherwise, an empty
+ // PersistentHashMapKeyMapper would be created. See
+ // PersistentStorage for more details about the concept of
+ // working_path.
// max_num_entries: max # of kvps. It will be used to compute 3 storages size.
// average_kv_byte_size: average byte size of a single key + serialized value.
// It will be used to compute kv_storage size.
@@ -60,24 +62,25 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
// considered valid.
static libtextclassifier3::StatusOr<
std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
- Create(const Filesystem& filesystem, std::string_view base_dir,
+ Create(const Filesystem& filesystem, std::string working_path,
int32_t max_num_entries = PersistentHashMap::Entry::kMaxNumEntries,
int32_t average_kv_byte_size =
PersistentHashMap::Options::kDefaultAverageKVByteSize,
int32_t max_load_factor_percent =
PersistentHashMap::Options::kDefaultMaxLoadFactorPercent);
- // Deletes all the files associated with the PersistentHashMapKeyMapper.
+ // Deletes working_path (and all the files under it recursively) associated
+ // with the PersistentHashMapKeyMapper.
//
- // base_dir : Base directory used to save all the files required to persist
- // PersistentHashMapKeyMapper. Should be the same as passed into
- // Create().
+ // working_path: Working directory used to save all the files required to
+ // persist PersistentHashMapKeyMapper. Should be the same as
+ // passed into Create().
//
// Returns:
// OK on success
// INTERNAL_ERROR on I/O error
static libtextclassifier3::Status Delete(const Filesystem& filesystem,
- std::string_view base_dir);
+ const std::string& working_path);
~PersistentHashMapKeyMapper() override = default;
@@ -122,7 +125,7 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
}
libtextclassifier3::StatusOr<Crc32> ComputeChecksum() override {
- return persistent_hash_map_->ComputeChecksum();
+ return persistent_hash_map_->UpdateChecksums();
}
private:
@@ -147,8 +150,6 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
PersistentHashMap::Iterator itr_;
};
- static constexpr std::string_view kKeyMapperDir = "key_mapper_dir";
-
// Use PersistentHashMapKeyMapper::Create() to instantiate.
explicit PersistentHashMapKeyMapper(
std::unique_ptr<PersistentHashMap> persistent_hash_map)
@@ -164,21 +165,13 @@ template <typename T, typename Formatter>
/* static */ libtextclassifier3::StatusOr<
std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
PersistentHashMapKeyMapper<T, Formatter>::Create(
- const Filesystem& filesystem, std::string_view base_dir,
+ const Filesystem& filesystem, std::string working_path,
int32_t max_num_entries, int32_t average_kv_byte_size,
int32_t max_load_factor_percent) {
- const std::string key_mapper_dir =
- absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
- if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to create PersistentHashMapKeyMapper directory: ",
- key_mapper_dir));
- }
-
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(
- filesystem, key_mapper_dir,
+ filesystem, std::move(working_path),
PersistentHashMap::Options(
/*value_type_size_in=*/sizeof(T),
/*max_num_entries_in=*/max_num_entries,
@@ -191,16 +184,9 @@ PersistentHashMapKeyMapper<T, Formatter>::Create(
template <typename T, typename Formatter>
/* static */ libtextclassifier3::Status
-PersistentHashMapKeyMapper<T, Formatter>::Delete(const Filesystem& filesystem,
- std::string_view base_dir) {
- const std::string key_mapper_dir =
- absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
- if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to delete PersistentHashMapKeyMapper directory: ",
- key_mapper_dir));
- }
- return libtextclassifier3::Status::OK;
+PersistentHashMapKeyMapper<T, Formatter>::Delete(
+ const Filesystem& filesystem, const std::string& working_path) {
+ return PersistentHashMap::Discard(filesystem, working_path);
}
} // namespace lib
diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc
index aceb11d..f32e541 100644
--- a/icing/transform/icu/icu-normalizer.cc
+++ b/icing/transform/icu/icu-normalizer.cc
@@ -134,17 +134,16 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const {
ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance";
}
- // Checks if the first character is within ASCII range or can be transformed
- // into an ASCII char. Since the term is tokenized, we know that the whole
- // term can be transformed into ASCII if the first character can.
- UChar32 first_uchar32 =
- i18n_utils::GetUChar32At(term.data(), term.length(), 0);
- if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 &&
- DiacriticCharToAscii(normalizer2, first_uchar32, nullptr)) {
- // This is a faster method to normalize Latin terms.
- normalized_text = NormalizeLatin(normalizer2, term);
- } else {
- normalized_text = term_transformer_->Transform(term);
+ // Normalize the prefix that can be transformed into ASCII.
+ // This is a faster method to normalize Latin terms.
+ NormalizeLatinResult result = NormalizeLatin(normalizer2, term);
+ normalized_text = std::move(result.text);
+ if (result.end_pos < term.length()) {
+ // Some portion of term couldn't be normalized via NormalizeLatin. Use
+ // term_transformer to handle this portion.
+ std::string_view rest_term = term.substr(result.end_pos);
+ absl_ports::StrAppend(&normalized_text,
+ term_transformer_->Transform(rest_term));
}
if (normalized_text.length() > max_term_byte_size_) {
@@ -154,40 +153,32 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const {
return normalized_text;
}
-std::string IcuNormalizer::NormalizeLatin(const UNormalizer2* normalizer2,
- const std::string_view term) const {
- std::string result;
- result.reserve(term.length());
- int current_pos = 0;
- while (current_pos < term.length()) {
- if (i18n_utils::IsAscii(term[current_pos])) {
- result.push_back(std::tolower(term[current_pos]));
- ++current_pos;
+IcuNormalizer::NormalizeLatinResult IcuNormalizer::NormalizeLatin(
+ const UNormalizer2* normalizer2, const std::string_view term) const {
+ NormalizeLatinResult result = {};
+ if (normalizer2 == nullptr) {
+ return result;
+ }
+ CharacterIterator char_itr(term);
+ result.text.reserve(term.length());
+ char ascii_char;
+ while (char_itr.utf8_index() < term.length()) {
+ UChar32 c = char_itr.GetCurrentChar();
+ if (i18n_utils::IsAscii(c)) {
+ result.text.push_back(std::tolower(c));
+ } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
+ result.text.push_back(std::tolower(ascii_char));
} else {
- UChar32 uchar32 =
- i18n_utils::GetUChar32At(term.data(), term.length(), current_pos);
- if (uchar32 == i18n_utils::kInvalidUChar32) {
- ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
- << " at position" << current_pos;
- current_pos += i18n_utils::GetUtf8Length(uchar32);
- continue;
- }
- char ascii_char;
- if (DiacriticCharToAscii(normalizer2, uchar32, &ascii_char)) {
- result.push_back(std::tolower(ascii_char));
- } else {
- // We don't know how to transform / decompose this Unicode character, it
- // probably means that some other Unicode characters are mixed with
- // Latin characters. This shouldn't happen if input term is properly
- // tokenized. We handle it here in case there're something wrong with
- // the tokenizers.
- int utf8_length = i18n_utils::GetUtf8Length(uchar32);
- absl_ports::StrAppend(&result, term.substr(current_pos, utf8_length));
- }
- current_pos += i18n_utils::GetUtf8Length(uchar32);
+ // We don't know how to transform / decompose this Unicode character, it
+ // probably means that some other Unicode characters are mixed with Latin
+ // characters. We return the partial result here and let the caller handle
+ // the rest.
+ result.end_pos = char_itr.utf8_index();
+ return result;
}
+ char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
}
-
+ result.end_pos = term.length();
return result;
}
@@ -267,10 +258,13 @@ std::string IcuNormalizer::TermTransformer::Transform(
return std::move(utf8_term_or).ValueOrDie();
}
-CharacterIterator FindNormalizedLatinMatchEndPosition(
+bool IcuNormalizer::FindNormalizedLatinMatchEndPosition(
const UNormalizer2* normalizer2, std::string_view term,
- CharacterIterator char_itr, std::string_view normalized_term) {
- CharacterIterator normalized_char_itr(normalized_term);
+ CharacterIterator& char_itr, std::string_view normalized_term,
+ CharacterIterator& normalized_char_itr) const {
+ if (normalizer2 == nullptr) {
+ return false;
+ }
char ascii_char;
while (char_itr.utf8_index() < term.length() &&
normalized_char_itr.utf8_index() < normalized_term.length()) {
@@ -278,16 +272,18 @@ CharacterIterator FindNormalizedLatinMatchEndPosition(
if (i18n_utils::IsAscii(c)) {
c = std::tolower(c);
} else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) {
- c = ascii_char;
+ c = std::tolower(ascii_char);
+ } else {
+ return false;
}
UChar32 normalized_c = normalized_char_itr.GetCurrentChar();
if (c != normalized_c) {
- return char_itr;
+ return true;
}
char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1);
normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1);
}
- return char_itr;
+ return true;
}
CharacterIterator
@@ -357,15 +353,18 @@ CharacterIterator IcuNormalizer::FindNormalizedMatchEndPosition(
}
CharacterIterator char_itr(term);
- UChar32 first_uchar32 = char_itr.GetCurrentChar();
- if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 &&
- DiacriticCharToAscii(normalizer2, first_uchar32, /*char_out=*/nullptr)) {
- return FindNormalizedLatinMatchEndPosition(normalizer2, term, char_itr,
- normalized_term);
- } else {
- return term_transformer_->FindNormalizedNonLatinMatchEndPosition(
- term, char_itr, normalized_term);
+ CharacterIterator normalized_char_itr(normalized_term);
+ if (FindNormalizedLatinMatchEndPosition(
+ normalizer2, term, char_itr, normalized_term, normalized_char_itr)) {
+ return char_itr;
}
+ // Some portion of term couldn't be normalized via
+ // FindNormalizedLatinMatchEndPosition. Use term_transformer to handle this
+ // portion.
+ std::string_view rest_normalized_term =
+ normalized_term.substr(normalized_char_itr.utf8_index());
+ return term_transformer_->FindNormalizedNonLatinMatchEndPosition(
+ term, char_itr, rest_normalized_term);
}
} // namespace lib
diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h
index d4f1ebd..7c64506 100644
--- a/icing/transform/icu/icu-normalizer.h
+++ b/icing/transform/icu/icu-normalizer.h
@@ -101,14 +101,36 @@ class IcuNormalizer : public Normalizer {
UTransliterator* u_transliterator_;
};
+ struct NormalizeLatinResult {
+ // A string representing the maximum prefix of term (can be empty or term
+ // itself) that can be normalized into ASCII.
+ std::string text;
+ // The first position of the char within term that normalization failed to
+ // transform into an ASCII char, or term.length() if all chars can be
+ // transformed.
+ size_t end_pos;
+ };
+
explicit IcuNormalizer(std::unique_ptr<TermTransformer> term_transformer,
int max_term_byte_size);
// Helper method to normalize Latin terms only. Rules applied:
// 1. Uppercase to lowercase
// 2. Remove diacritic (accent) marks
- std::string NormalizeLatin(const UNormalizer2* normalizer2,
- std::string_view term) const;
+ NormalizeLatinResult NormalizeLatin(const UNormalizer2* normalizer2,
+ std::string_view term) const;
+
+ // Set char_itr and normalized_char_itr to point to one past the end of the
+ // segments of term and normalized_term that can match if normalized into
+ // ASCII. In this case, true will be returned.
+ //
+ // The method stops at the position when char_itr cannot be normalized into
+ // ASCII and returns false, so that term_transformer can handle the remaining
+ // portion.
+ bool FindNormalizedLatinMatchEndPosition(
+ const UNormalizer2* normalizer2, std::string_view term,
+ CharacterIterator& char_itr, std::string_view normalized_term,
+ CharacterIterator& normalized_char_itr) const;
// Used to transform terms into their normalized forms.
std::unique_ptr<TermTransformer> term_transformer_;
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index 143da17..719f7be 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -111,6 +111,7 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) {
EXPECT_THAT(normalizer_->NormalizeTerm("ÝŶŸẎẏŷýÿ"), Eq("yyyyyyyy"));
EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"),
Eq("zzzzzzzzzzzz"));
+ EXPECT_THAT(normalizer_->NormalizeTerm("Barış"), Eq("baris"));
}
// Accent / diacritic marks won't be removed in non-latin chars, e.g. in
@@ -278,6 +279,14 @@ TEST_F(IcuNormalizerTest, PrefixMatchLength) {
term = "Buenos días";
match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di");
EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí"));
+
+ term = "BarışIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "baris");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
+
+ term = "ÀĄḁáIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "aaaa");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ÀĄḁá"));
}
TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
@@ -327,6 +336,10 @@ TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) {
term = "días";
match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond");
EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día"));
+
+ term = "BarışIcing";
+ match_end = normalizer->FindNormalizedMatchEndPosition(term, "barismdi");
+ EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış"));
}
} // namespace