From 5a41ca990be33387b0d5d15836a465bbe5ff5a28 Mon Sep 17 00:00:00 2001 From: Alex Saveliev Date: Tue, 7 Feb 2023 20:02:36 -0800 Subject: Update icing from upstream ====================================================================== Adds a proto change for the delete propagation option ====================================================================== [ez] Change version to magic for PersistentHashMap ====================================================================== [iOS][testing][nitro] Disabling ICU language segmenter_test. ====================================================================== 1. Add support for segmentation in the QueryVisitor. ====================================================================== Support the new double list type in ScoreExpression ====================================================================== Pass JoinChildrenFetcher from IcingSearchEngine all the way down to ScoringVisitor ====================================================================== Refactor the logic of Icing Joins so that nested search and scoring will be performed before the parent ====================================================================== Add lite-index thread-safety tests. ====================================================================== Split IcingSearchEngineTest into separate tests to cover specific apis: ====================================================================== Fix index tests TearDown method. ====================================================================== Improve query concurrency by providing a finer-grained lock around the LiteIndex. ====================================================================== Fix Icing normalization bug ====================================================================== [ez] Fix integer overflow error for IntegerIndexStorage ====================================================================== [NumericSearch][Storage][11/x] Implement Reset and destructor for IntegerIndex ====================================================================== [NumericSearch][Storage][10/x] Add class IntegerIndex ====================================================================== Refactor NumericIndex based on PersistentStorage ====================================================================== Refactor IntegerIndexStorage based on PersistentStorage ====================================================================== Add "working_path" into PersistentStorage ====================================================================== Refactor PersistentHashMap based on PersistentStorage ====================================================================== Create virtual class PersistentStorage for refactoring ====================================================================== Avoids returning reference to local temporary object. ====================================================================== LSC: Clean up references to the legacy protobuf compat library ====================================================================== Fix time complexity regression for snippet retriever Bug: 256022027 Bug: 193919210 Bug: 266132035 Bug: 208654892 Bug: 261474063 Bug: 266103594 Bug: 146008613 Bug: 253182853 Bug: 266204868 Bug: 249829533 Bug: 266665956 Bug: 265258364 Change-Id: Ib2398c5097b6a2a57900e2ad4e3737502aa13820 --- icing/file/file-backed-proto-log.h | 6 +- icing/file/file-backed-proto.h | 4 +- icing/file/filesystem.cc | 3 + icing/file/persistent-hash-map.cc | 458 +- icing/file/persistent-hash-map.h | 174 +- icing/file/persistent-hash-map_test.cc | 353 +- icing/file/persistent-storage.cc | 55 + icing/file/persistent-storage.h | 338 + icing/file/portable-file-backed-proto-log.h | 5 +- icing/icing-search-engine.cc | 81 +- icing/icing-search-engine.h | 10 +- icing/icing-search-engine_delete_test.cc | 768 ++ icing/icing-search-engine_initialization_test.cc | 1920 ++++ icing/icing-search-engine_optimize_test.cc | 974 ++ icing/icing-search-engine_put_test.cc | 481 + icing/icing-search-engine_schema_test.cc | 1698 +++ icing/icing-search-engine_search_test.cc | 4143 +++++++ icing/icing-search-engine_suggest_test.cc | 1304 +++ icing/icing-search-engine_test.cc | 11015 +------------------ icing/index/index-processor_benchmark.cc | 118 +- icing/index/index-processor_test.cc | 35 +- icing/index/index_test.cc | 1 + .../index/lite/doc-hit-info-iterator-term-lite.cc | 4 +- icing/index/lite/lite-index.cc | 59 +- icing/index/lite/lite-index.h | 144 +- icing/index/lite/lite-index_test.cc | 6 +- icing/index/lite/lite-index_thread-safety_test.cc | 400 + icing/index/main/main-index_test.cc | 2 + icing/index/numeric/dummy-numeric-index.h | 69 +- icing/index/numeric/integer-index-storage.cc | 292 +- icing/index/numeric/integer-index-storage.h | 195 +- icing/index/numeric/integer-index-storage_test.cc | 205 +- icing/index/numeric/integer-index.cc | 242 + icing/index/numeric/integer-index.h | 190 + icing/index/numeric/numeric-index.h | 40 +- icing/index/numeric/numeric-index_test.cc | 23 +- icing/index/string-section-indexing-handler.cc | 2 +- icing/jni/icing-search-engine-jni.cc | 6 +- icing/join/join-children-fetcher.cc | 39 + icing/join/join-children-fetcher.h | 73 + icing/join/join-children-fetcher_test.cc | 82 + icing/join/join-processor.cc | 47 +- icing/join/join-processor.h | 14 +- icing/join/join-processor_test.cc | 49 +- icing/portable/equals-proto.h | 2 +- icing/portable/gzip_stream.h | 20 +- icing/query/advanced_query_parser/query-visitor.cc | 168 +- icing/query/advanced_query_parser/query-visitor.h | 90 +- .../advanced_query_parser/query-visitor_test.cc | 333 +- icing/query/query-processor.cc | 26 +- icing/query/query-processor_benchmark.cc | 24 +- icing/query/query-processor_test.cc | 36 +- icing/scoring/advanced_scoring/advanced-scorer.cc | 10 +- icing/scoring/advanced_scoring/advanced-scorer.h | 4 +- icing/scoring/advanced_scoring/score-expression.cc | 85 +- icing/scoring/advanced_scoring/score-expression.h | 80 +- .../advanced_scoring/score-expression_test.cc | 104 +- icing/scoring/advanced_scoring/scoring-visitor.h | 9 +- icing/scoring/scorer-factory.cc | 5 +- icing/scoring/scorer-factory.h | 4 +- icing/scoring/scoring-processor.cc | 13 +- icing/scoring/scoring-processor.h | 4 +- icing/store/document-store.cc | 11 +- icing/store/document-store.h | 4 +- icing/store/document-store_test.cc | 4 +- icing/store/key-mapper_benchmark.cc | 9 +- icing/store/key-mapper_test.cc | 22 +- icing/store/persistent-hash-map-key-mapper.h | 54 +- icing/transform/icu/icu-normalizer.cc | 111 +- icing/transform/icu/icu-normalizer.h | 26 +- icing/transform/icu/icu-normalizer_test.cc | 13 + 71 files changed, 15487 insertions(+), 11911 deletions(-) create mode 100644 icing/file/persistent-storage.cc create mode 100644 icing/file/persistent-storage.h create mode 100644 icing/icing-search-engine_delete_test.cc create mode 100644 icing/icing-search-engine_initialization_test.cc create mode 100644 icing/icing-search-engine_optimize_test.cc create mode 100644 icing/icing-search-engine_put_test.cc create mode 100644 icing/icing-search-engine_schema_test.cc create mode 100644 icing/icing-search-engine_search_test.cc create mode 100644 icing/icing-search-engine_suggest_test.cc create mode 100644 icing/index/lite/lite-index_thread-safety_test.cc create mode 100644 icing/index/numeric/integer-index.cc create mode 100644 icing/index/numeric/integer-index.h create mode 100644 icing/join/join-children-fetcher.cc create mode 100644 icing/join/join-children-fetcher.h create mode 100644 icing/join/join-children-fetcher_test.cc (limited to 'icing') diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h index 78236ba..095f832 100644 --- a/icing/file/file-backed-proto-log.h +++ b/icing/file/file-backed-proto-log.h @@ -40,7 +40,6 @@ #include #include "icing/text_classifier/lib3/utils/base/statusor.h" -#include #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/file/filesystem.h" @@ -53,6 +52,7 @@ #include "icing/util/data-loss.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" +#include namespace icing { namespace lib { @@ -575,8 +575,8 @@ libtextclassifier3::StatusOr FileBackedProtoLog::ReadProto( return absl_ports::NotFoundError("The proto data has been erased."); } - google::protobuf::io::ArrayInputStream proto_stream( - mmapped_file.mutable_region(), stored_size); + google::protobuf::io::ArrayInputStream proto_stream(mmapped_file.mutable_region(), + stored_size); // Deserialize proto ProtoT proto; diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h index d7d9bad..8deb7a6 100644 --- a/icing/file/file-backed-proto.h +++ b/icing/file/file-backed-proto.h @@ -66,10 +66,10 @@ class FileBackedProto { // Reset the internal file_path for the file backed proto. // Example use: // auto file_backed_proto1 = *FileBackedProto::Create(...); - // auto file_backed_proto2 = *FileBackedProto::Create(...); + // auto file_backed_google::protobuf = *FileBackedProto::Create(...); // filesystem.SwapFiles(file1, file2); // file_backed_proto1.SetSwappedFilepath(file2); - // file_backed_proto2.SetSwappedFilepath(file1); + // file_backed_google::protobuf.SetSwappedFilepath(file1); void SetSwappedFilepath(std::string_view swapped_to_file_path) { file_path_ = swapped_to_file_path; } diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc index 10b77db..c83a351 100644 --- a/icing/file/filesystem.cc +++ b/icing/file/filesystem.cc @@ -127,6 +127,9 @@ bool ListDirectoryInternal(const char* dir_name, return false; } + // According to linux man page + // (https://man7.org/linux/man-pages/man3/readdir.3.html#RETURN_VALUE), dirent + // may be statically allocated, so don't free it. dirent* p; // readdir's implementation seems to be thread safe. while ((p = readdir(dir)) != nullptr) { diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc index 0af5e2f..14a1251 100644 --- a/icing/file/persistent-hash-map.cc +++ b/icing/file/persistent-hash-map.cc @@ -58,125 +58,26 @@ libtextclassifier3::StatusOr HashKeyToBucketIndex( return static_cast(std::hash()(key) % num_buckets); } -// Helper function to PWrite crcs and info to metadata_file_path. Note that -// metadata_file_path will be the normal or temporary (for branching use when -// rehashing) metadata file path. -libtextclassifier3::Status WriteMetadata(const Filesystem& filesystem, - const char* metadata_file_path, - const PersistentHashMap::Crcs* crcs, - const PersistentHashMap::Info* info) { - ScopedFd sfd(filesystem.OpenForWrite(metadata_file_path)); - if (!sfd.is_valid()) { - return absl_ports::InternalError("Failed to create metadata file"); - } - - // Write crcs and info. File layout: - if (!filesystem.PWrite(sfd.get(), PersistentHashMap::Crcs::kFileOffset, crcs, - sizeof(PersistentHashMap::Crcs))) { - return absl_ports::InternalError("Failed to write crcs into metadata file"); - } - // Note that PWrite won't change the file offset, so we need to specify - // the correct offset when writing Info. - if (!filesystem.PWrite(sfd.get(), PersistentHashMap::Info::kFileOffset, info, - sizeof(PersistentHashMap::Info))) { - return absl_ports::InternalError("Failed to write info into metadata file"); - } - - return libtextclassifier3::Status::OK; -} - -// Helper function to update checksums from info and storages to a Crcs -// instance. Note that storages will be the normal instances used by -// PersistentHashMap, or the temporary instances (for branching use when -// rehashing). -libtextclassifier3::Status UpdateChecksums( - PersistentHashMap::Crcs* crcs, PersistentHashMap::Info* info, - FileBackedVector* bucket_storage, - FileBackedVector* entry_storage, - FileBackedVector* kv_storage) { - // Compute crcs - ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc, - bucket_storage->ComputeChecksum()); - ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc, - entry_storage->ComputeChecksum()); - ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage->ComputeChecksum()); - - crcs->component_crcs.info_crc = info->ComputeChecksum().Get(); - crcs->component_crcs.bucket_storage_crc = bucket_storage_crc.Get(); - crcs->component_crcs.entry_storage_crc = entry_storage_crc.Get(); - crcs->component_crcs.kv_storage_crc = kv_storage_crc.Get(); - crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get(); - - return libtextclassifier3::Status::OK; -} - -// Helper function to validate checksums. -libtextclassifier3::Status ValidateChecksums( - const PersistentHashMap::Crcs* crcs, const PersistentHashMap::Info* info, - FileBackedVector* bucket_storage, - FileBackedVector* entry_storage, - FileBackedVector* kv_storage) { - if (crcs->all_crc != crcs->component_crcs.ComputeChecksum().Get()) { - return absl_ports::FailedPreconditionError( - "Invalid all crc for PersistentHashMap"); - } - - if (crcs->component_crcs.info_crc != info->ComputeChecksum().Get()) { - return absl_ports::FailedPreconditionError( - "Invalid info crc for PersistentHashMap"); - } - - ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc, - bucket_storage->ComputeChecksum()); - if (crcs->component_crcs.bucket_storage_crc != bucket_storage_crc.Get()) { - return absl_ports::FailedPreconditionError( - "Mismatch crc with PersistentHashMap bucket storage"); - } - - ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc, - entry_storage->ComputeChecksum()); - if (crcs->component_crcs.entry_storage_crc != entry_storage_crc.Get()) { - return absl_ports::FailedPreconditionError( - "Mismatch crc with PersistentHashMap entry storage"); - } - - ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage->ComputeChecksum()); - if (crcs->component_crcs.kv_storage_crc != kv_storage_crc.Get()) { - return absl_ports::FailedPreconditionError( - "Mismatch crc with PersistentHashMap key value storage"); - } - - return libtextclassifier3::Status::OK; -} - -// Since metadata/bucket/entry storages should be branched when rehashing, we -// have to store them together under the same sub directory -// ("/"). On the other hand, key-value storage won't be -// branched and it will be stored under . -// // The following 4 methods are helper functions to get the correct path of -// metadata/bucket/entry/key-value storages, according to the given base -// directory and sub directory. -std::string GetMetadataFilePath(std::string_view base_dir, - std::string_view sub_dir) { - return absl_ports::StrCat(base_dir, "/", sub_dir, "/", - PersistentHashMap::kFilePrefix, ".m"); +// metadata/bucket/entry/key-value storages, according to the given working +// directory path. +std::string GetMetadataFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix, + ".m"); } -std::string GetBucketStorageFilePath(std::string_view base_dir, - std::string_view sub_dir) { - return absl_ports::StrCat(base_dir, "/", sub_dir, "/", - PersistentHashMap::kFilePrefix, ".b"); +std::string GetBucketStorageFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix, + ".b"); } -std::string GetEntryStorageFilePath(std::string_view base_dir, - std::string_view sub_dir) { - return absl_ports::StrCat(base_dir, "/", sub_dir, "/", - PersistentHashMap::kFilePrefix, ".e"); +std::string GetEntryStorageFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix, + ".e"); } -std::string GetKeyValueStorageFilePath(std::string_view base_dir) { - return absl_ports::StrCat(base_dir, "/", PersistentHashMap::kFilePrefix, +std::string GetKeyValueStorageFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", PersistentHashMap::kFilePrefix, ".k"); } @@ -234,29 +135,31 @@ bool PersistentHashMap::Options::IsValid() const { /* static */ libtextclassifier3::StatusOr> PersistentHashMap::Create(const Filesystem& filesystem, - std::string_view base_dir, const Options& options) { + std::string working_path, Options options) { if (!options.IsValid()) { return absl_ports::InvalidArgumentError( "Invalid PersistentHashMap options"); } - if (!filesystem.FileExists( - GetMetadataFilePath(base_dir, kSubDirectory).c_str()) || + if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) || + !filesystem.FileExists(GetBucketStorageFilePath(working_path).c_str()) || + !filesystem.FileExists(GetEntryStorageFilePath(working_path).c_str()) || !filesystem.FileExists( - GetBucketStorageFilePath(base_dir, kSubDirectory).c_str()) || - !filesystem.FileExists( - GetEntryStorageFilePath(base_dir, kSubDirectory).c_str()) || - !filesystem.FileExists(GetKeyValueStorageFilePath(base_dir).c_str())) { - // TODO: erase all files if missing any. - return InitializeNewFiles(filesystem, base_dir, options); - } - return InitializeExistingFiles(filesystem, base_dir, options); + GetKeyValueStorageFilePath(working_path).c_str())) { + // Discard working_path if any of them is missing, and reinitialize. + ICING_RETURN_IF_ERROR(Discard(filesystem, working_path)); + return InitializeNewFiles(filesystem, std::move(working_path), + std::move(options)); + } + return InitializeExistingFiles(filesystem, std::move(working_path), + std::move(options)); } PersistentHashMap::~PersistentHashMap() { if (!PersistToDisk().ok()) { ICING_LOG(WARNING) - << "Failed to persist hash map to disk while destructing " << base_dir_; + << "Failed to persist hash map to disk while destructing " + << working_path_; } } @@ -278,7 +181,7 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key, ICING_ASSIGN_OR_RETURN(const Entry* entry, entry_storage_->Get(idx_pair.target_entry_index)); - int32_t kv_len = key.length() + 1 + info()->value_type_size; + int32_t kv_len = key.length() + 1 + info().value_type_size; int32_t value_offset = key.length() + 1; ICING_ASSIGN_OR_RETURN( typename FileBackedVector::MutableArrayView mutable_kv_arr, @@ -286,7 +189,7 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key, // It is the same key and value_size is fixed, so we can directly overwrite // serialized value. mutable_kv_arr.SetArray(value_offset, reinterpret_cast(value), - info()->value_type_size); + info().value_type_size); return libtextclassifier3::Status::OK; } @@ -319,8 +222,8 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key, ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); if (idx_pair.target_entry_index == Entry::kInvalidIndex) { - return absl_ports::NotFoundError( - absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_)); + return absl_ports::NotFoundError(absl_ports::StrCat( + "Key not found in PersistentHashMap ", working_path_)); } return CopyEntryValue(idx_pair.target_entry_index, value); @@ -335,8 +238,8 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) { ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); if (idx_pair.target_entry_index == Entry::kInvalidIndex) { - return absl_ports::NotFoundError( - absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_)); + return absl_ports::NotFoundError(absl_ports::StrCat( + "Key not found in PersistentHashMap ", working_path_)); } ICING_ASSIGN_OR_RETURN( @@ -375,7 +278,7 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) { // Zero out the key value bytes. It is necessary for iterator to iterate // through kv_storage and handle deleted keys properly. - int32_t kv_len = key.length() + 1 + info()->value_type_size; + int32_t kv_len = key.length() + 1 + info().value_type_size; ICING_RETURN_IF_ERROR(kv_storage_->Set( mutable_target_entry.Get().key_value_index(), kv_len, '\0')); @@ -383,23 +286,7 @@ libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) { mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex); mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex); - ++(info()->num_deleted_entries); - - return libtextclassifier3::Status::OK; -} - -libtextclassifier3::Status PersistentHashMap::PersistToDisk() { - ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk()); - ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk()); - ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk()); - - ICING_RETURN_IF_ERROR(UpdateChecksums(crcs(), info(), bucket_storage_.get(), - entry_storage_.get(), - kv_storage_.get())); - // Changes should have been applied to the underlying file when using - // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an - // extra safety step to ensure they are written out. - ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk()); + ++(info().num_deleted_entries); return libtextclassifier3::Status::OK; } @@ -415,8 +302,7 @@ libtextclassifier3::StatusOr PersistentHashMap::GetDiskUsage() const { int64_t total = bucket_storage_disk_usage + entry_storage_disk_usage + kv_storage_disk_usage; Filesystem::IncrementByOrSetInvalid( - filesystem_->GetDiskUsage( - GetMetadataFilePath(base_dir_, kSubDirectory).c_str()), + filesystem_.GetDiskUsage(GetMetadataFilePath(working_path_).c_str()), &total); if (total < 0 || total == Filesystem::kBadFileSize) { @@ -438,23 +324,15 @@ libtextclassifier3::StatusOr PersistentHashMap::GetElementsSize() kv_storage_elements_size; } -libtextclassifier3::StatusOr PersistentHashMap::ComputeChecksum() { - Crcs* crcs_ptr = crcs(); - ICING_RETURN_IF_ERROR(UpdateChecksums(crcs_ptr, info(), bucket_storage_.get(), - entry_storage_.get(), - kv_storage_.get())); - return Crc32(crcs_ptr->all_crc); -} - /* static */ libtextclassifier3::StatusOr> PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem, - std::string_view base_dir, - const Options& options) { - // Create directory. - const std::string dir_path = absl_ports::StrCat(base_dir, "/", kSubDirectory); - if (!filesystem.CreateDirectoryRecursively(dir_path.c_str())) { + std::string&& working_path, + Options&& options) { + // PersistentHashMap uses working_path as working directory path. + // Create working directory. + if (!filesystem.CreateDirectory(working_path.c_str())) { return absl_ports::InternalError( - absl_ports::StrCat("Failed to create directory: ", dir_path)); + absl_ports::StrCat("Failed to create directory: ", working_path)); } int32_t max_num_buckets_required = @@ -469,7 +347,7 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem, ICING_ASSIGN_OR_RETURN( std::unique_ptr> bucket_storage, FileBackedVector::Create( - filesystem, GetBucketStorageFilePath(base_dir, kSubDirectory), + filesystem, GetBucketStorageFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, pre_mapping_mmap_size)); @@ -477,75 +355,77 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem, pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries; max_file_size = pre_mapping_mmap_size + FileBackedVector::Header::kHeaderSize; - ICING_ASSIGN_OR_RETURN( - std::unique_ptr> entry_storage, - FileBackedVector::Create( - filesystem, GetEntryStorageFilePath(base_dir, kSubDirectory), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, - pre_mapping_mmap_size)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr> entry_storage, + FileBackedVector::Create( + filesystem, GetEntryStorageFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, pre_mapping_mmap_size)); // Initialize kv_storage pre_mapping_mmap_size = options.average_kv_byte_size * options.max_num_entries; max_file_size = pre_mapping_mmap_size + FileBackedVector::Header::kHeaderSize; - ICING_ASSIGN_OR_RETURN(std::unique_ptr> kv_storage, - FileBackedVector::Create( - filesystem, GetKeyValueStorageFilePath(base_dir), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - max_file_size, pre_mapping_mmap_size)); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr> kv_storage, + FileBackedVector::Create( + filesystem, GetKeyValueStorageFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, + pre_mapping_mmap_size)); // Initialize buckets. ICING_RETURN_IF_ERROR(bucket_storage->Set( /*idx=*/0, /*len=*/options.init_num_buckets, Bucket())); ICING_RETURN_IF_ERROR(bucket_storage->PersistToDisk()); - // Create and initialize new info - Info new_info; - new_info.version = kVersion; - new_info.value_type_size = options.value_type_size; - new_info.max_load_factor_percent = options.max_load_factor_percent; - new_info.num_deleted_entries = 0; - new_info.num_deleted_key_value_bytes = 0; - - // Compute checksums - Crcs new_crcs; - ICING_RETURN_IF_ERROR(UpdateChecksums(&new_crcs, &new_info, - bucket_storage.get(), - entry_storage.get(), kv_storage.get())); - - const std::string metadata_file_path = - GetMetadataFilePath(base_dir, kSubDirectory); - // Write new metadata file - ICING_RETURN_IF_ERROR(WriteMetadata(filesystem, metadata_file_path.c_str(), - &new_crcs, &new_info)); - - // Mmap the content of the crcs and info. - ICING_ASSIGN_OR_RETURN(MemoryMappedFile metadata_mmapped_file, - MemoryMappedFile::Create( - filesystem, metadata_file_path, - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); - ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap( - /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info))); - - return std::unique_ptr(new PersistentHashMap( - filesystem, base_dir, options, std::move(metadata_mmapped_file), - std::move(bucket_storage), std::move(entry_storage), - std::move(kv_storage))); + // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and + // call GrowAndRemapIfNecessary to grow the underlying file. + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile metadata_mmapped_file, + MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/kMetadataFileSize, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/kMetadataFileSize)); + ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary( + /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize)); + + // Create instance. + auto new_persistent_hash_map = + std::unique_ptr(new PersistentHashMap( + filesystem, std::move(working_path), std::move(options), + std::move(metadata_mmapped_file), std::move(bucket_storage), + std::move(entry_storage), std::move(kv_storage))); + // Initialize info content by writing mapped memory directly. + Info& info_ref = new_persistent_hash_map->info(); + info_ref.magic = Info::kMagic; + info_ref.value_type_size = new_persistent_hash_map->options_.value_type_size; + info_ref.max_load_factor_percent = + new_persistent_hash_map->options_.max_load_factor_percent; + info_ref.num_deleted_entries = 0; + info_ref.num_deleted_key_value_bytes = 0; + // Initialize new PersistentStorage. The initial checksums will be computed + // and set via InitializeNewStorage. + ICING_RETURN_IF_ERROR(new_persistent_hash_map->InitializeNewStorage()); + + return new_persistent_hash_map; } /* static */ libtextclassifier3::StatusOr> PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, - std::string_view base_dir, - const Options& options) { - // Mmap the content of the crcs and info. + std::string&& working_path, + Options&& options) { + // Initialize metadata file ICING_ASSIGN_OR_RETURN( MemoryMappedFile metadata_mmapped_file, - MemoryMappedFile::Create( - filesystem, GetMetadataFilePath(base_dir, kSubDirectory), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); - ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap( - /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info))); + MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/kMetadataFileSize, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/kMetadataFileSize)); + if (metadata_mmapped_file.available_size() != kMetadataFileSize) { + return absl_ports::FailedPreconditionError("Incorrect metadata file size"); + } int32_t max_num_buckets_required = CalculateNumBucketsRequired( options.max_num_entries, options.max_load_factor_percent); @@ -557,7 +437,7 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, ICING_ASSIGN_OR_RETURN( std::unique_ptr> bucket_storage, FileBackedVector::Create( - filesystem, GetBucketStorageFilePath(base_dir, kSubDirectory), + filesystem, GetBucketStorageFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, pre_mapping_mmap_size)); @@ -565,77 +445,113 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries; max_file_size = pre_mapping_mmap_size + FileBackedVector::Header::kHeaderSize; - ICING_ASSIGN_OR_RETURN( - std::unique_ptr> entry_storage, - FileBackedVector::Create( - filesystem, GetEntryStorageFilePath(base_dir, kSubDirectory), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, - pre_mapping_mmap_size)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr> entry_storage, + FileBackedVector::Create( + filesystem, GetEntryStorageFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, pre_mapping_mmap_size)); // Initialize kv_storage pre_mapping_mmap_size = options.average_kv_byte_size * options.max_num_entries; max_file_size = pre_mapping_mmap_size + FileBackedVector::Header::kHeaderSize; - ICING_ASSIGN_OR_RETURN(std::unique_ptr> kv_storage, - FileBackedVector::Create( - filesystem, GetKeyValueStorageFilePath(base_dir), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - max_file_size, pre_mapping_mmap_size)); - - Crcs* crcs_ptr = reinterpret_cast( - metadata_mmapped_file.mutable_region() + Crcs::kFileOffset); - Info* info_ptr = reinterpret_cast( - metadata_mmapped_file.mutable_region() + Info::kFileOffset); - - // Value type size should be consistent. - if (options.value_type_size != info_ptr->value_type_size) { - return absl_ports::FailedPreconditionError("Incorrect value type size"); - } + ICING_ASSIGN_OR_RETURN( + std::unique_ptr> kv_storage, + FileBackedVector::Create( + filesystem, GetKeyValueStorageFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, + pre_mapping_mmap_size)); - // Current # of entries should not exceed options.max_num_entries - // We compute max_file_size of 3 storages by options.max_num_entries. Since we - // won't recycle space of deleted entries (and key-value bytes), they're still - // occupying space in storages. Even if # of "active" entries doesn't exceed - // options.max_num_entries, the new kvp to be inserted still potentially - // exceeds max_file_size. - // Therefore, we should use entry_storage->num_elements() instead of # of + // Create instance. + auto persistent_hash_map = + std::unique_ptr(new PersistentHashMap( + filesystem, std::move(working_path), std::move(options), + std::move(metadata_mmapped_file), std::move(bucket_storage), + std::move(entry_storage), std::move(kv_storage))); + // Initialize existing PersistentStorage. Checksums will be validated. + ICING_RETURN_IF_ERROR(persistent_hash_map->InitializeExistingStorage()); + + // Validate other values of info and options. + // Current # of entries should not exceed options_.max_num_entries + // We compute max_file_size of 3 storages by options_.max_num_entries. Since + // we won't recycle space of deleted entries (and key-value bytes), they're + // still occupying space in storages. Even if # of "active" entries doesn't + // exceed options_.max_num_entries, the new kvp to be inserted still + // potentially exceeds max_file_size. + // Therefore, we should use entry_storage_->num_elements() instead of # of // "active" entries - // (i.e. entry_storage->num_elements() - info_ptr->num_deleted_entries) to + // (i.e. entry_storage_->num_elements() - info_ptr->num_deleted_entries) to // check. This feature avoids storages being grown extremely large when there // are many Delete() and Put() operations. - if (entry_storage->num_elements() > options.max_num_entries) { + if (persistent_hash_map->entry_storage_->num_elements() > + persistent_hash_map->options_.max_num_entries) { return absl_ports::FailedPreconditionError( "Current # of entries exceeds max num entries"); } - // Validate checksums of info and 3 storages. - ICING_RETURN_IF_ERROR( - ValidateChecksums(crcs_ptr, info_ptr, bucket_storage.get(), - entry_storage.get(), kv_storage.get())); + // Magic should be the same. + if (persistent_hash_map->info().magic != Info::kMagic) { + return absl_ports::FailedPreconditionError( + "PersistentHashMap header magic mismatch"); + } + + // Value type size should be consistent. + if (persistent_hash_map->options_.value_type_size != + persistent_hash_map->info().value_type_size) { + return absl_ports::FailedPreconditionError("Incorrect value type size"); + } // Allow max_load_factor_percent_ change. - if (options.max_load_factor_percent != info_ptr->max_load_factor_percent) { + if (persistent_hash_map->options_.max_load_factor_percent != + persistent_hash_map->info().max_load_factor_percent) { ICING_VLOG(2) << "Changing max_load_factor_percent from " - << info_ptr->max_load_factor_percent << " to " - << options.max_load_factor_percent; + << persistent_hash_map->info().max_load_factor_percent + << " to " + << persistent_hash_map->options_.max_load_factor_percent; + + persistent_hash_map->info().max_load_factor_percent = + persistent_hash_map->options_.max_load_factor_percent; + ICING_RETURN_IF_ERROR( + persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false)); - info_ptr->max_load_factor_percent = options.max_load_factor_percent; - crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get(); - crcs_ptr->all_crc = crcs_ptr->component_crcs.ComputeChecksum().Get(); - ICING_RETURN_IF_ERROR(metadata_mmapped_file.PersistToDisk()); + ICING_RETURN_IF_ERROR(persistent_hash_map->PersistToDisk()); } - auto persistent_hash_map = - std::unique_ptr(new PersistentHashMap( - filesystem, base_dir, options, std::move(metadata_mmapped_file), - std::move(bucket_storage), std::move(entry_storage), - std::move(kv_storage))); - ICING_RETURN_IF_ERROR( - persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false)); return persistent_hash_map; } +libtextclassifier3::Status PersistentHashMap::PersistStoragesToDisk() { + ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk()); + ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk()); + ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk()); + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status PersistentHashMap::PersistMetadataToDisk() { + // Changes should have been applied to the underlying file when using + // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an + // extra safety step to ensure they are written out. + return metadata_mmapped_file_->PersistToDisk(); +} + +libtextclassifier3::StatusOr PersistentHashMap::ComputeInfoChecksum() { + return info().ComputeChecksum(); +} + +libtextclassifier3::StatusOr +PersistentHashMap::ComputeStoragesChecksum() { + // Compute crcs + ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc, + bucket_storage_->ComputeChecksum()); + ICING_ASSIGN_OR_RETURN(Crc32 entry_storage_crc, + entry_storage_->ComputeChecksum()); + ICING_ASSIGN_OR_RETURN(Crc32 kv_storage_crc, kv_storage_->ComputeChecksum()); + + return Crc32(bucket_storage_crc.Get() ^ entry_storage_crc.Get() ^ + kv_storage_crc.Get()); +} + libtextclassifier3::StatusOr PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx, std::string_view key) const { @@ -674,7 +590,7 @@ libtextclassifier3::Status PersistentHashMap::CopyEntryValue( ICING_ASSIGN_OR_RETURN(const char* kv_arr, kv_storage_->Get(entry->key_value_index())); int32_t value_offset = strlen(kv_arr) + 1; - memcpy(value, kv_arr + value_offset, info()->value_type_size); + memcpy(value, kv_arr + value_offset, info().value_type_size); return libtextclassifier3::Status::OK; } @@ -702,7 +618,7 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx, // Append new key value. int32_t new_kv_idx = kv_storage_->num_elements(); - int32_t kv_len = key.size() + 1 + info()->value_type_size; + int32_t kv_len = key.size() + 1 + info().value_type_size; int32_t value_offset = key.size() + 1; ICING_ASSIGN_OR_RETURN( typename FileBackedVector::MutableArrayView mutable_new_kv_arr, @@ -711,7 +627,7 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx, mutable_new_kv_arr.SetArray(/*idx=*/key.size(), "\0", 1); mutable_new_kv_arr.SetArray(/*idx=*/value_offset, reinterpret_cast(value), - info()->value_type_size); + info().value_type_size); // Append new entry. int32_t new_entry_idx = entry_storage_->num_elements(); @@ -727,7 +643,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary( int32_t new_num_bucket = bucket_storage_->num_elements(); while (new_num_bucket <= Bucket::kMaxNumBuckets / 2 && size() > static_cast(new_num_bucket) * - info()->max_load_factor_percent / 100) { + info().max_load_factor_percent / 100) { new_num_bucket *= 2; } @@ -764,7 +680,7 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary( entry_storage_->TruncateTo(entry_idx); } - info()->num_deleted_entries = 0; + info().num_deleted_entries = 0; return libtextclassifier3::Status::OK; } @@ -774,7 +690,7 @@ bool PersistentHashMap::Iterator::Advance() { // key value pair. In the first round (after construction), curr_key_len_ // is 0, so don't jump over anything. if (curr_key_len_ != 0) { - curr_kv_idx_ += curr_key_len_ + 1 + map_->info()->value_type_size; + curr_kv_idx_ += curr_key_len_ + 1 + map_->info().value_type_size; curr_key_len_ = 0; } diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h index 57fa070..a6d14bb 100644 --- a/icing/file/persistent-hash-map.h +++ b/icing/file/persistent-hash-map.h @@ -24,6 +24,7 @@ #include "icing/file/file-backed-vector.h" #include "icing/file/filesystem.h" #include "icing/file/memory-mapped-file.h" +#include "icing/file/persistent-storage.h" #include "icing/util/crc32.h" namespace icing { @@ -34,7 +35,7 @@ namespace lib { // Key and value can be any type, but callers should serialize key/value by // themselves and pass raw bytes into the hash map, and the serialized key // should not contain termination character '\0'. -class PersistentHashMap { +class PersistentHashMap : public PersistentStorage { public: // For iterating through persistent hash map. The order is not guaranteed. // @@ -80,45 +81,15 @@ class PersistentHashMap { friend class PersistentHashMap; }; - // Crcs and Info will be written into the metadata file. - // File layout: - // Crcs - struct Crcs { - static constexpr int32_t kFileOffset = 0; - - struct ComponentCrcs { - uint32_t info_crc; - uint32_t bucket_storage_crc; - uint32_t entry_storage_crc; - uint32_t kv_storage_crc; - - bool operator==(const ComponentCrcs& other) const { - return info_crc == other.info_crc && - bucket_storage_crc == other.bucket_storage_crc && - entry_storage_crc == other.entry_storage_crc && - kv_storage_crc == other.kv_storage_crc; - } - - Crc32 ComputeChecksum() const { - return Crc32(std::string_view(reinterpret_cast(this), - sizeof(ComponentCrcs))); - } - } __attribute__((packed)); - - bool operator==(const Crcs& other) const { - return all_crc == other.all_crc && component_crcs == other.component_crcs; - } - - uint32_t all_crc; - ComponentCrcs component_crcs; - } __attribute__((packed)); - static_assert(sizeof(Crcs) == 20, ""); + // Metadata file layout: + static constexpr int32_t kCrcsMetadataFileOffset = 0; + static constexpr int32_t kInfoMetadataFileOffset = + static_cast(sizeof(Crcs)); - // Info struct Info { - static constexpr int32_t kFileOffset = static_cast(sizeof(Crcs)); + static constexpr int32_t kMagic = 0x653afd7b; - int32_t version; + int32_t magic; int32_t value_type_size; int32_t max_load_factor_percent; int32_t num_deleted_entries; @@ -131,6 +102,9 @@ class PersistentHashMap { } __attribute__((packed)); static_assert(sizeof(Info) == 20, ""); + static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info); + static_assert(kMetadataFileSize == 32, ""); + // Bucket class Bucket { public: @@ -270,33 +244,48 @@ class PersistentHashMap { int32_t init_num_buckets; }; - static constexpr int32_t kVersion = 1; - + static constexpr WorkingPathType kWorkingPathType = + WorkingPathType::kDirectory; static constexpr std::string_view kFilePrefix = "persistent_hash_map"; - // Only metadata, bucket, entry files are stored under this sub-directory, for - // rehashing branching use. - static constexpr std::string_view kSubDirectory = "dynamic"; // Creates a new PersistentHashMap to read/write/delete key value pairs. // // filesystem: Object to make system level calls - // base_dir: Specifies the directory for all persistent hash map related - // sub-directory and files to be stored. If base_dir doesn't exist, - // then PersistentHashMap will automatically create it. If files - // exist, then it will initialize the hash map from existing files. + // working_path: Specifies the working path for PersistentStorage. + // PersistentHashMap uses working path as working directory and + // all related files will be stored under this directory. It + // takes full ownership and of working_path_, including + // creation/deletion. It is the caller's responsibility to + // specify correct working path and avoid mixing different + // persistent storages together under the same path. Also the + // caller has the ownership for the parent directory of + // working_path_, and it is responsible for parent directory + // creation/deletion. See PersistentStorage for more details + // about the concept of working_path. // options: Options instance. // // Returns: // INVALID_ARGUMENT_ERROR if any value in options is invalid. // FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored - // checksum. + // checksum or any other inconsistency. // INTERNAL_ERROR on I/O errors. // Any FileBackedVector errors. static libtextclassifier3::StatusOr> - Create(const Filesystem& filesystem, std::string_view base_dir, - const Options& options); + Create(const Filesystem& filesystem, std::string working_path, + Options options); - ~PersistentHashMap(); + // Deletes PersistentHashMap under working_path. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + static libtextclassifier3::Status Discard(const Filesystem& filesystem, + std::string working_path) { + return PersistentStorage::Discard(filesystem, working_path, + kWorkingPathType); + } + + ~PersistentHashMap() override; // Update a key value pair. If key does not exist, then insert (key, value) // into the storage. Otherwise overwrite the value into the storage. @@ -349,13 +338,6 @@ class PersistentHashMap { Iterator GetIterator() const { return Iterator(this); } - // Flushes content to underlying files. - // - // Returns: - // OK on success - // INTERNAL_ERROR on I/O error - libtextclassifier3::Status PersistToDisk(); - // Calculates and returns the disk usage (metadata + 3 storages total file // size) in bytes. // @@ -374,16 +356,8 @@ class PersistentHashMap { // INTERNAL_ERROR on I/O error libtextclassifier3::StatusOr GetElementsSize() const; - // Updates all checksums of the persistent hash map components and returns - // all_crc. - // - // Returns: - // Crc of all components (all_crc) on success - // INTERNAL_ERROR if any data inconsistency - libtextclassifier3::StatusOr ComputeChecksum(); - int32_t size() const { - return entry_storage_->num_elements() - info()->num_deleted_entries; + return entry_storage_->num_elements() - info().num_deleted_entries; } bool empty() const { return size() == 0; } @@ -402,14 +376,14 @@ class PersistentHashMap { }; explicit PersistentHashMap( - const Filesystem& filesystem, std::string_view base_dir, - const Options& options, MemoryMappedFile&& metadata_mmapped_file, + const Filesystem& filesystem, std::string&& working_path, + Options&& options, MemoryMappedFile&& metadata_mmapped_file, std::unique_ptr> bucket_storage, std::unique_ptr> entry_storage, std::unique_ptr> kv_storage) - : filesystem_(&filesystem), - base_dir_(base_dir), - options_(options), + : PersistentStorage(filesystem, std::move(working_path), + kWorkingPathType), + options_(std::move(options)), metadata_mmapped_file_(std::make_unique( std::move(metadata_mmapped_file))), bucket_storage_(std::move(bucket_storage)), @@ -417,12 +391,40 @@ class PersistentHashMap { kv_storage_(std::move(kv_storage)) {} static libtextclassifier3::StatusOr> - InitializeNewFiles(const Filesystem& filesystem, std::string_view base_dir, - const Options& options); + InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path, + Options&& options); static libtextclassifier3::StatusOr> InitializeExistingFiles(const Filesystem& filesystem, - std::string_view base_dir, const Options& options); + std::string&& working_path, Options&& options); + + // Flushes contents of all storages to underlying files. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status PersistStoragesToDisk() override; + + // Flushes contents of metadata file. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status PersistMetadataToDisk() override; + + // Computes and returns Info checksum. + // + // Returns: + // - Crc of the Info on success + libtextclassifier3::StatusOr ComputeInfoChecksum() override; + + // Computes and returns all storages checksum. Checksums of bucket_storage_, + // entry_storage_ and kv_storage_ will be combined together by XOR. + // + // Returns: + // - Crc of all storages on success + // - INTERNAL_ERROR if any data inconsistency + libtextclassifier3::StatusOr ComputeStoragesChecksum() override; // Find the index of the target entry (that contains the key) from a bucket // (specified by bucket index). Also return the previous entry index, since @@ -468,23 +470,25 @@ class PersistentHashMap { // Any FileBackedVector errors libtextclassifier3::Status RehashIfNecessary(bool force_rehash); - Crcs* crcs() { - return reinterpret_cast(metadata_mmapped_file_->mutable_region() + - Crcs::kFileOffset); + Crcs& crcs() override { + return *reinterpret_cast(metadata_mmapped_file_->mutable_region() + + kCrcsMetadataFileOffset); } - Info* info() { - return reinterpret_cast(metadata_mmapped_file_->mutable_region() + - Info::kFileOffset); + const Crcs& crcs() const override { + return *reinterpret_cast(metadata_mmapped_file_->region() + + kCrcsMetadataFileOffset); } - const Info* info() const { - return reinterpret_cast(metadata_mmapped_file_->region() + - Info::kFileOffset); + Info& info() { + return *reinterpret_cast(metadata_mmapped_file_->mutable_region() + + kInfoMetadataFileOffset); } - const Filesystem* filesystem_; - std::string base_dir_; + const Info& info() const { + return *reinterpret_cast(metadata_mmapped_file_->region() + + kInfoMetadataFileOffset); + } Options options_; diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc index 8fde4a8..6e9a41b 100644 --- a/icing/file/persistent-hash-map_test.cc +++ b/icing/file/persistent-hash-map_test.cc @@ -24,7 +24,9 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "icing/file/file-backed-vector.h" #include "icing/file/filesystem.h" +#include "icing/file/persistent-storage.h" #include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" #include "icing/util/crc32.h" @@ -34,6 +36,7 @@ using ::testing::Eq; using ::testing::Gt; using ::testing::HasSubstr; using ::testing::IsEmpty; +using ::testing::IsTrue; using ::testing::Key; using ::testing::Lt; using ::testing::Not; @@ -48,7 +51,7 @@ namespace lib { namespace { using Bucket = PersistentHashMap::Bucket; -using Crcs = PersistentHashMap::Crcs; +using Crcs = PersistentStorage::Crcs; using Entry = PersistentHashMap::Entry; using Info = PersistentHashMap::Info; using Options = PersistentHashMap::Options; @@ -59,7 +62,11 @@ static constexpr int32_t kTestInitNumBuckets = 1; class PersistentHashMapTest : public ::testing::Test { protected: void SetUp() override { - base_dir_ = GetTestTempDir() + "/persistent_hash_map_test"; + base_dir_ = GetTestTempDir() + "/icing"; + ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), + IsTrue()); + + working_path_ = base_dir_ + "/persistent_hash_map_test"; } void TearDown() override { @@ -93,6 +100,7 @@ class PersistentHashMapTest : public ::testing::Test { Filesystem filesystem_; std::string base_dir_; + std::string working_path_; }; TEST_F(PersistentHashMapTest, OptionsInvalidValueTypeSize) { @@ -185,11 +193,11 @@ TEST_F(PersistentHashMapTest, EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, InvalidBaseDir) { - EXPECT_THAT( - PersistentHashMap::Create(filesystem_, "/dev/null", - Options(/*value_type_size_in=*/sizeof(int))), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); +TEST_F(PersistentHashMapTest, InvalidWorkingPath) { + EXPECT_THAT(PersistentHashMap::Create( + filesystem_, "/dev/null/persistent_hash_map_test", + Options(/*value_type_size_in=*/sizeof(int))), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) { @@ -197,16 +205,16 @@ TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) { ASSERT_FALSE(invalid_options.IsValid()); EXPECT_THAT( - PersistentHashMap::Create(filesystem_, base_dir_, invalid_options), + PersistentHashMap::Create(filesystem_, working_path_, invalid_options), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } TEST_F(PersistentHashMapTest, InitializeNewFiles) { { - ASSERT_FALSE(filesystem_.DirectoryExists(base_dir_.c_str())); + ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, + PersistentHashMap::Create(filesystem_, working_path_, Options(/*value_type_size_in=*/sizeof(int)))); EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty())); @@ -215,17 +223,16 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) { // Metadata file should be initialized correctly for both info and crcs // sections. - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); // Check info section Info info; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info), - Info::kFileOffset)); - EXPECT_THAT(info.version, Eq(PersistentHashMap::kVersion)); + PersistentHashMap::kInfoMetadataFileOffset)); + EXPECT_THAT(info.magic, Eq(Info::kMagic)); EXPECT_THAT(info.value_type_size, Eq(sizeof(int))); EXPECT_THAT(info.max_load_factor_percent, Eq(Options::kDefaultMaxLoadFactorPercent)); @@ -235,13 +242,10 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) { // Check crcs section Crcs crcs; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + PersistentHashMap::kCrcsMetadataFileOffset)); // # of elements in bucket_storage should be 1, so it should have non-zero - // crc value. - EXPECT_THAT(crcs.component_crcs.bucket_storage_crc, Not(Eq(0))); - // Other empty file backed vectors should have 0 crc value. - EXPECT_THAT(crcs.component_crcs.entry_storage_crc, Eq(0)); - EXPECT_THAT(crcs.component_crcs.kv_storage_crc, Eq(0)); + // all storages crc value. + EXPECT_THAT(crcs.component_crcs.storages_crc, Not(Eq(0))); EXPECT_THAT(crcs.component_crcs.info_crc, Eq(Crc32(std::string_view(reinterpret_cast(&info), sizeof(Info))) @@ -260,7 +264,7 @@ TEST_F(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -279,7 +283,7 @@ TEST_F(PersistentHashMapTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/1, @@ -301,7 +305,7 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(original_init_num_buckets)); @@ -314,7 +318,7 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); // # of buckets should still be the original value. EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(original_init_num_buckets)); @@ -327,7 +331,7 @@ TEST_F(PersistentHashMapTest, // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); // Put some key value pairs. ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); @@ -343,7 +347,7 @@ TEST_F(PersistentHashMapTest, // Without calling PersistToDisk, checksums will not be recomputed or synced // to disk, so initializing another instance on the same files should fail. - EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options), + EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } @@ -353,7 +357,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map1, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); // Put some key value pairs. ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data())); @@ -374,7 +378,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map2, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); EXPECT_THAT(persistent_hash_map2, Pointee(SizeIs(2))); EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "a"), IsOkAndHolds(1)); EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "b"), IsOkAndHolds(2)); @@ -387,7 +391,7 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data())); @@ -407,20 +411,74 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) { // we should be able to get the same contents. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2))); EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2)); } } +TEST_F(PersistentHashMapTest, + InitializeExistingFilesWithDifferentMagicShouldFail) { + Options options(/*value_type_size_in=*/sizeof(int)); + + { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr persistent_hash_map, + PersistentHashMap::Create(filesystem_, working_path_, options)); + ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); + + ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); + } + + { + // Manually change kMagic and update checksum + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".m"); + ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); + ASSERT_TRUE(metadata_sfd.is_valid()); + + Crcs crcs; + ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), + PersistentHashMap::kCrcsMetadataFileOffset)); + + Info info; + ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info), + PersistentHashMap::kInfoMetadataFileOffset)); + + // Manually change magic and update checksums. + info.magic += kCorruptedValueOffset; + crcs.component_crcs.info_crc = info.ComputeChecksum().Get(); + crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get(); + ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), + PersistentHashMap::kCrcsMetadataFileOffset, + &crcs, sizeof(Crcs))); + ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), + PersistentHashMap::kInfoMetadataFileOffset, + &info, sizeof(Info))); + } + + { + // Attempt to create the persistent hash map with different magic. This + // should fail. + libtextclassifier3::StatusOr> + persistent_hash_map_or = + PersistentHashMap::Create(filesystem_, working_path_, options); + EXPECT_THAT(persistent_hash_map_or, + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(persistent_hash_map_or.status().error_message(), + HasSubstr("PersistentHashMap header magic mismatch")); + } +} + TEST_F(PersistentHashMapTest, InitializeExistingFilesWithDifferentValueTypeSizeShouldFail) { { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, + PersistentHashMap::Create(filesystem_, working_path_, Options(/*value_type_size_in=*/sizeof(int)))); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); @@ -433,7 +491,7 @@ TEST_F(PersistentHashMapTest, ASSERT_THAT(sizeof(char), Not(Eq(sizeof(int)))); libtextclassifier3::StatusOr> persistent_hash_map_or = PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(/*value_type_size_in=*/sizeof(char))); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); @@ -449,7 +507,7 @@ TEST_F(PersistentHashMapTest, // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); @@ -461,7 +519,7 @@ TEST_F(PersistentHashMapTest, options.max_num_entries = 1; ASSERT_TRUE(options.IsValid()); - EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options), + EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } @@ -478,7 +536,7 @@ TEST_F(PersistentHashMapTest, options.max_num_entries = 1; ASSERT_TRUE(options.IsValid()); - EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_, options), + EXPECT_THAT(PersistentHashMap::Create(filesystem_, working_path_, options), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } } @@ -490,26 +548,26 @@ TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); Crcs crcs; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + PersistentHashMap::kCrcsMetadataFileOffset)); // Manually corrupt all_crc crcs.all_crc += kCorruptedValueOffset; - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); + ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), + PersistentHashMap::kCrcsMetadataFileOffset, + &crcs, sizeof(Crcs))); metadata_sfd.reset(); { @@ -517,11 +575,11 @@ TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) { // corrupted all_crc. This should fail. libtextclassifier3::StatusOr> persistent_hash_map_or = - PersistentHashMap::Create(filesystem_, base_dir_, options); + PersistentHashMap::Create(filesystem_, working_path_, options); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(persistent_hash_map_or.status().error_message(), - HasSubstr("Invalid all crc for PersistentHashMap")); + HasSubstr("Invalid all crc")); } } @@ -533,173 +591,169 @@ TEST_F(PersistentHashMapTest, // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); Info info; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info), - Info::kFileOffset)); + PersistentHashMap::kInfoMetadataFileOffset)); // Modify info, but don't update the checksum. This would be similar to // corruption of info. info.num_deleted_entries += kCorruptedValueOffset; - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Info::kFileOffset, &info, - sizeof(Info))); + ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), + PersistentHashMap::kInfoMetadataFileOffset, + &info, sizeof(Info))); { // Attempt to create the persistent hash map with info that doesn't match // its checksum and confirm that it fails. libtextclassifier3::StatusOr> persistent_hash_map_or = - PersistentHashMap::Create(filesystem_, base_dir_, options); + PersistentHashMap::Create(filesystem_, working_path_, options); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(persistent_hash_map_or.status().error_message(), - HasSubstr("Invalid info crc for PersistentHashMap")); + HasSubstr("Invalid info crc")); } } TEST_F(PersistentHashMapTest, - InitializeExistingFilesWithWrongBucketStorageCrc) { + InitializeExistingFilesWithCorruptedBucketStorage) { Options options(/*value_type_size_in=*/sizeof(int)); { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); - ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); - ASSERT_TRUE(metadata_sfd.is_valid()); - - Crcs crcs; - ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + { + // Update bucket storage manually. + const std::string bucket_storage_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".b"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> bucket_storage, + FileBackedVector::Create( + filesystem_, bucket_storage_file_path, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, + bucket_storage->ComputeChecksum()); + ICING_ASSERT_OK(bucket_storage->Append(Bucket())); + ICING_ASSERT_OK(bucket_storage->PersistToDisk()); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, + bucket_storage->ComputeChecksum()); + ASSERT_THAT(old_crc, Not(Eq(new_crc))); + } - // Manually corrupt bucket_storage_crc - crcs.component_crcs.bucket_storage_crc += kCorruptedValueOffset; - crcs.all_crc = Crc32(std::string_view( - reinterpret_cast(&crcs.component_crcs), - sizeof(Crcs::ComponentCrcs))) - .Get(); - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); { // Attempt to create the persistent hash map with metadata containing // corrupted bucket_storage_crc. This should fail. libtextclassifier3::StatusOr> persistent_hash_map_or = - PersistentHashMap::Create(filesystem_, base_dir_, options); + PersistentHashMap::Create(filesystem_, working_path_, options); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - persistent_hash_map_or.status().error_message(), - HasSubstr("Mismatch crc with PersistentHashMap bucket storage")); + EXPECT_THAT(persistent_hash_map_or.status().error_message(), + HasSubstr("Invalid storages crc")); } } -TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongEntryStorageCrc) { +TEST_F(PersistentHashMapTest, + InitializeExistingFilesWithCorruptedEntryStorage) { Options options(/*value_type_size_in=*/sizeof(int)); { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); - ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); - ASSERT_TRUE(metadata_sfd.is_valid()); - - Crcs crcs; - ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + { + // Update entry storage manually. + const std::string entry_storage_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".e"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> entry_storage, + FileBackedVector::Create( + filesystem_, entry_storage_file_path, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, entry_storage->ComputeChecksum()); + ICING_ASSERT_OK(entry_storage->Append( + Entry(/*key_value_index=*/-1, /*next_entry_index=*/-1))); + ICING_ASSERT_OK(entry_storage->PersistToDisk()); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, entry_storage->ComputeChecksum()); + ASSERT_THAT(old_crc, Not(Eq(new_crc))); + } - // Manually corrupt entry_storage_crc - crcs.component_crcs.entry_storage_crc += kCorruptedValueOffset; - crcs.all_crc = Crc32(std::string_view( - reinterpret_cast(&crcs.component_crcs), - sizeof(Crcs::ComponentCrcs))) - .Get(); - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); { // Attempt to create the persistent hash map with metadata containing // corrupted entry_storage_crc. This should fail. libtextclassifier3::StatusOr> persistent_hash_map_or = - PersistentHashMap::Create(filesystem_, base_dir_, options); + PersistentHashMap::Create(filesystem_, working_path_, options); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(persistent_hash_map_or.status().error_message(), - HasSubstr("Mismatch crc with PersistentHashMap entry storage")); + HasSubstr("Invalid storages crc")); } } TEST_F(PersistentHashMapTest, - InitializeExistingFilesWithWrongKeyValueStorageCrc) { + InitializeExistingFilesWithCorruptedKeyValueStorage) { Options options(/*value_type_size_in=*/sizeof(int)); { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); - ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); - ASSERT_TRUE(metadata_sfd.is_valid()); - - Crcs crcs; - ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + { + // Update kv storage manually. + const std::string kv_storage_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".k"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> kv_storage, + FileBackedVector::Create( + filesystem_, kv_storage_file_path, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, kv_storage->ComputeChecksum()); + ICING_ASSERT_OK(kv_storage->Append('z')); + ICING_ASSERT_OK(kv_storage->PersistToDisk()); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, kv_storage->ComputeChecksum()); + ASSERT_THAT(old_crc, Not(Eq(new_crc))); + } - // Manually corrupt kv_storage_crc - crcs.component_crcs.kv_storage_crc += kCorruptedValueOffset; - crcs.all_crc = Crc32(std::string_view( - reinterpret_cast(&crcs.component_crcs), - sizeof(Crcs::ComponentCrcs))) - .Get(); - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); { // Attempt to create the persistent hash map with metadata containing // corrupted kv_storage_crc. This should fail. libtextclassifier3::StatusOr> persistent_hash_map_or = - PersistentHashMap::Create(filesystem_, base_dir_, options); + PersistentHashMap::Create(filesystem_, working_path_, options); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - persistent_hash_map_or.status().error_message(), - HasSubstr("Mismatch crc with PersistentHashMap key value storage")); + EXPECT_THAT(persistent_hash_map_or.status().error_message(), + HasSubstr("Invalid storages crc")); } } @@ -716,7 +770,7 @@ TEST_F(PersistentHashMapTest, // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); @@ -739,7 +793,7 @@ TEST_F(PersistentHashMapTest, // Also verify all entries should remain unchanged. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2))); EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); @@ -748,15 +802,14 @@ TEST_F(PersistentHashMapTest, ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", PersistentHashMap::kSubDirectory, "/", - PersistentHashMap::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", PersistentHashMap::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); Info info; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info), - Info::kFileOffset)); + PersistentHashMap::kInfoMetadataFileOffset)); EXPECT_THAT(info.max_load_factor_percent, Eq(options.max_load_factor_percent)); @@ -765,7 +818,7 @@ TEST_F(PersistentHashMapTest, { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } @@ -786,7 +839,7 @@ TEST_F(PersistentHashMapTest, // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data())); @@ -816,7 +869,7 @@ TEST_F(PersistentHashMapTest, // should remain the same. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(prev_num_buckets)); @@ -834,7 +887,7 @@ TEST_F(PersistentHashMapTest, // exceeds the limit. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, options)); + PersistentHashMap::Create(filesystem_, working_path_, options)); // After changing max_load_factor_percent, there should be rehashing and the // new loading should not be greater than the new max load factor. @@ -856,7 +909,7 @@ TEST_F(PersistentHashMapTest, PutAndGet) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -892,7 +945,7 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -925,7 +978,7 @@ TEST_F(PersistentHashMapTest, ShouldRehash) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -961,7 +1014,7 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -987,7 +1040,7 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1016,7 +1069,7 @@ TEST_F(PersistentHashMapTest, Delete) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1067,7 +1120,7 @@ TEST_F(PersistentHashMapTest, DeleteMultiple) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1125,7 +1178,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1163,7 +1216,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1200,7 +1253,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1238,7 +1291,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1262,7 +1315,7 @@ TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/1, @@ -1297,7 +1350,7 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, - PersistentHashMap::Create(filesystem_, base_dir_, + PersistentHashMap::Create(filesystem_, working_path_, Options(/*value_type_size_in=*/sizeof(int)))); const char invalid_key[] = "a\0bc"; @@ -1319,7 +1372,7 @@ TEST_F(PersistentHashMapTest, EmptyHashMapIterator) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1336,7 +1389,7 @@ TEST_F(PersistentHashMapTest, Iterator) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1363,7 +1416,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1392,7 +1445,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1421,7 +1474,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, @@ -1450,7 +1503,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, diff --git a/icing/file/persistent-storage.cc b/icing/file/persistent-storage.cc new file mode 100644 index 0000000..9a595ef --- /dev/null +++ b/icing/file/persistent-storage.cc @@ -0,0 +1,55 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/file/persistent-storage.h" + +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/filesystem.h" +#include "icing/legacy/core/icing-string-util.h" + +namespace icing { +namespace lib { + +/* static */ libtextclassifier3::Status PersistentStorage::Discard( + const Filesystem& filesystem, const std::string& working_path, + WorkingPathType working_path_type) { + switch (working_path_type) { + case WorkingPathType::kSingleFile: { + if (!filesystem.DeleteFile(working_path.c_str())) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to delete PersistentStorage file: ", working_path)); + } + return libtextclassifier3::Status::OK; + } + case WorkingPathType::kDirectory: { + if (!filesystem.DeleteDirectoryRecursively(working_path.c_str())) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to delete PersistentStorage directory: ", working_path)); + } + return libtextclassifier3::Status::OK; + } + case WorkingPathType::kDummy: + return libtextclassifier3::Status::OK; + } + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Unknown working path type %d for PersistentStorage %s", + static_cast(working_path_type), working_path.c_str())); +} + +} // namespace lib +} // namespace icing diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h new file mode 100644 index 0000000..a70c9e9 --- /dev/null +++ b/icing/file/persistent-storage.h @@ -0,0 +1,338 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_FILE_PERSISTENT_STORAGE_H_ +#define ICING_FILE_PERSISTENT_STORAGE_H_ + +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/filesystem.h" +#include "icing/util/crc32.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +// PersistentStorage: an abstract class for all persistent data structures. +// - It provides some common persistent file methods, e.g. PersistToDisk. +// - It encapsulates most of the checksum handling logics (including update and +// validation). +// +// Terminology: +// - Crcs: checksum section +// - Info: (custom) information for derived class +// - Metadata: Crcs + Info +// +// Usually a persistent data structure will have its own custom Info and +// storages (single or composite storages) definition. To create a new +// persistent data structure via PersistentStorage: +// - Decide what type the working path is (single file or directory). See +// working_path_ and WorkingPathType for more details. +// - Create a new class that inherits PersistentStorage: +// - Declare custom Info and design the metadata section layout. +// Usually the layout is , and there are 2 common ways to +// manage metadata section: +// - Have a separate file for metadata. In this case, the new persistent +// data structure contains multiple files, so working path should be used +// as directory path and multiple files will be stored under it. Example: +// PersistentHashMap. +// - Have a single file for both metadata and storage data. In this case, +// the file layout should be , and +// working path should be used as file path. Example: FileBackedVector. +// - Handle working path file/directory creation and deletion. +// PersistentStorage only provides static Discard() method to use. The +// derived class should implement other logics, e.g. working path (file +// /directory) creation, check condition to discard working path and start +// over new file(s). +// - Implement all pure virtual methods: +// - PersistStoragesToDisk: persist all (composite) storages. In general, +// the implementation will be calling PersistToDisk for all composite +// storages. +// - PersistMetadataToDisk: persist metadata, including Crcs and Info. +// - If the derived class maintains a concrete Crc and (custom) Info +// instance, then it should perform write/pwrite into the metadata +// section. +// - If the derived class uses memory-mapped region directly for metadata, +// then it should call MemoryMappedFile::PersistToDisk. +// - See crcs() for more details. +// - ComputeInfoChecksum: compute the checksum for custom Info. +// - ComputeStoragesChecksum: compute the (combined) checksum for all +// (composite) storages. In general, the implementation will be calling +// UpdateChecksums for all composite storages and XOR all checksums. +// - crcs(): provide the reference for PersistentStorage to write checksums. +// The derived class can either maintain a concrete Crcs instance, or +// reinterpret_cast the memory-mapped region to Crcs reference. Either +// choice is fine as long as PersistMetadataToDisk flushes it to disk +// correctly. +// - Call either InitializeNewStorage or InitializeExistingStorage when creating +// and initializing an instance, depending on initializing new storage or from +// existing file(s). +class PersistentStorage { + public: + enum class WorkingPathType { + kSingleFile, + kDirectory, + kDummy, + }; + + // Crcs and Info will be written into the metadata section. Info is defined by + // the actual implementation of each persistent storage. Usually the Metadata + // layout is: + struct Crcs { + struct ComponentCrcs { + uint32_t info_crc; + uint32_t storages_crc; + + bool operator==(const ComponentCrcs& other) const { + return info_crc == other.info_crc && storages_crc == other.storages_crc; + } + + Crc32 ComputeChecksum() const { + return Crc32(std::string_view(reinterpret_cast(this), + sizeof(ComponentCrcs))); + } + } __attribute__((packed)); + + bool operator==(const Crcs& other) const { + return all_crc == other.all_crc && component_crcs == other.component_crcs; + } + + uint32_t all_crc; + ComponentCrcs component_crcs; + } __attribute__((packed)); + static_assert(sizeof(Crcs) == 12, ""); + + // Deletes working_path according to its type. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + // - INVALID_ARGUMENT_ERROR if working_path_type is unknown type + static libtextclassifier3::Status Discard(const Filesystem& filesystem, + const std::string& working_path, + WorkingPathType working_path_type); + + virtual ~PersistentStorage() = default; + + // Initializes new persistent storage. It computes the initial checksums and + // writes into the metadata file. + // + // Note: either InitializeNewStorage or InitializeExistingStorage should be + // invoked after creating a PersistentStorage instance before using, otherwise + // an uninitialized instance will fail to use persistent storage features, + // e.g. PersistToDisk, UpdateChecksums. + // + // Returns: + // - OK on success or already initialized + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::Status InitializeNewStorage() { + if (is_initialized_) { + return libtextclassifier3::Status::OK; + } + + ICING_RETURN_IF_ERROR(UpdateChecksumsInternal()); + ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); + + is_initialized_ = true; + return libtextclassifier3::Status::OK; + } + + // Initializes persistent storage from existing file(s). + // + // It enforces the following check(s): + // - Validate checksums. + // + // Note: either InitializeNewStorage or InitializeExistingStorage should be + // invoked after creating a PersistentStorage instance before using. + // + // Returns: + // - OK on success or already initialized + // - FAILED_PRECONDITION_ERROR if checksum validation fails. + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::Status InitializeExistingStorage() { + if (is_initialized_) { + return libtextclassifier3::Status::OK; + } + + ICING_RETURN_IF_ERROR(ValidateChecksums()); + + is_initialized_ = true; + return libtextclassifier3::Status::OK; + } + + // Flushes contents to underlying files. + // 1) Flushes storages. + // 2) Updates all checksums by new data. + // 3) Flushes metadata. + // + // Returns: + // - OK on success + // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized + // - Any errors from PersistStoragesToDisk, UpdateChecksums, + // PersistMetadataToDisk, depending on actual implementation + libtextclassifier3::Status PersistToDisk() { + if (!is_initialized_) { + return absl_ports::FailedPreconditionError(absl_ports::StrCat( + "PersistentStorage ", working_path_, " not initialized")); + } + + ICING_RETURN_IF_ERROR(PersistStoragesToDisk()); + ICING_RETURN_IF_ERROR(UpdateChecksums()); + ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); + return libtextclassifier3::Status::OK; + } + + // Updates checksums of all components and returns the overall crc (all_crc) + // of the persistent storage. + // + // Returns: + // - Overall crc of the persistent storage on success + // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::StatusOr UpdateChecksums() { + if (!is_initialized_) { + return absl_ports::FailedPreconditionError(absl_ports::StrCat( + "PersistentStorage ", working_path_, " not initialized")); + } + + return UpdateChecksumsInternal(); + } + + protected: + explicit PersistentStorage(const Filesystem& filesystem, + std::string working_path, + WorkingPathType working_path_type) + : filesystem_(filesystem), + working_path_(std::move(working_path)), + working_path_type_(working_path_type), + is_initialized_(false) {} + + // Flushes contents of metadata. The implementation should flush Crcs and Info + // correctly, depending on whether they're using memory-mapped regions or + // concrete instances in the derived class. + // + // Returns: + // - OK on success + // - Any other errors, depending on actual implementation + virtual libtextclassifier3::Status PersistMetadataToDisk() = 0; + + // Flushes contents of all storages to underlying files. + // + // Returns: + // - OK on success + // - Any other errors, depending on actual implementation + virtual libtextclassifier3::Status PersistStoragesToDisk() = 0; + + // Computes and returns Info checksum. + // + // This function will be mainly called by UpdateChecksums. + // + // Returns: + // - Crc of the Info on success + // - Any other errors, depending on actual implementation + virtual libtextclassifier3::StatusOr ComputeInfoChecksum() = 0; + + // Computes and returns all storages checksum. If there are multiple storages, + // usually we XOR their checksums together to a single checksum. + // + // This function will be mainly called by UpdateChecksums. + // + // Returns: + // - Crc of all storages on success + // - Any other errors from depending on actual implementation + virtual libtextclassifier3::StatusOr ComputeStoragesChecksum() = 0; + + // Returns the Crcs instance reference. The derived class can either own a + // concrete Crcs instance, or reinterpret_cast the memory-mapped region to + // Crcs reference. PersistMetadataToDisk should flush it to disk correctly. + virtual Crcs& crcs() = 0; + virtual const Crcs& crcs() const = 0; + + const Filesystem& filesystem_; + // Path to the storage. It can be a single file path or a directory path + // depending on the implementation of the derived class. + // + // Note that the derived storage class will take full ownership and of + // working_path_, including creation/deletion. It is the caller's + // responsibility to specify correct working path and avoid mixing different + // persistent storages together under the same path. Also the caller has the + // ownership for the parent directory of working_path_, and it is responsible + // for parent directory creation/deletion. + std::string working_path_; + WorkingPathType working_path_type_; + + bool is_initialized_; + + private: + // Updates checksums of all components and returns the overall crc (all_crc) + // of the persistent storage. Different from UpdateChecksums, it won't check + // if PersistentStorage is initialized or not. + // + // Returns: + // - Overall crc of the persistent storage on success + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::StatusOr UpdateChecksumsInternal() { + Crcs& crcs_ref = crcs(); + // Compute and update storages + info checksums. + ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum()); + ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum()); + crcs_ref.component_crcs.info_crc = info_crc.Get(); + crcs_ref.component_crcs.storages_crc = storages_crc.Get(); + + // Finally compute and update overall checksum. + crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get(); + return Crc32(crcs_ref.all_crc); + } + + // Validates all checksums of the persistent storage. + // + // Returns: + // - OK on success + // - FAILED_PRECONDITION_ERROR if any checksum is incorrect. + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::Status ValidateChecksums() { + const Crcs& crcs_ref = crcs(); + if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) { + return absl_ports::FailedPreconditionError("Invalid all crc"); + } + + ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum()); + if (crcs_ref.component_crcs.info_crc != info_crc.Get()) { + return absl_ports::FailedPreconditionError("Invalid info crc"); + } + + ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum()); + if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) { + return absl_ports::FailedPreconditionError("Invalid storages crc"); + } + + return libtextclassifier3::Status::OK; + } +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_FILE_PERSISTENT_STORAGE_H_ diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h index e48e6e0..48e3501 100644 --- a/icing/file/portable-file-backed-proto-log.h +++ b/icing/file/portable-file-backed-proto-log.h @@ -64,7 +64,6 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" -#include #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/file/filesystem.h" @@ -79,6 +78,7 @@ #include "icing/util/data-loss.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" +#include namespace icing { namespace lib { @@ -971,8 +971,7 @@ PortableFileBackedProtoLog::ReadProto(int64_t file_offset) const { return absl_ports::NotFoundError("The proto data has been erased."); } - google::protobuf::io::ArrayInputStream proto_stream(buf.get(), - stored_size); + google::protobuf::io::ArrayInputStream proto_stream(buf.get(), stored_size); // Deserialize proto ProtoT proto; diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 20759f8..dd43204 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -93,6 +93,7 @@ namespace { constexpr std::string_view kDocumentSubfolderName = "document_dir"; constexpr std::string_view kIndexSubfolderName = "index_dir"; +constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir"; constexpr std::string_view kSchemaSubfolderName = "schema_dir"; constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker"; constexpr std::string_view kInitMarkerFilename = "init_marker"; @@ -343,6 +344,14 @@ std::string MakeIndexDirectoryPath(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kIndexSubfolderName); } +// Working path for integer index. Integer index is derived from +// PersistentStorage and it will take full ownership of this working path, +// including creation/deletion. See PersistentStorage for more details about +// working path. +std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName); +} + // SchemaStore files are in a standalone subfolder for easier file management. // We can delete and recreate the subfolder and not touch/affect anything // else. @@ -655,7 +664,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( // TODO(b/249829533): switch to use persistent numeric index after // implementing and initialize numeric index. - integer_index_ = std::make_unique>(); + TC3_ASSIGN_OR_RETURN( + integer_index_, + DummyNumericIndex::Create( + *filesystem_, MakeIntegerIndexWorkingPath(options_.base_dir()))); libtextclassifier3::Status index_init_status; if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) { @@ -1738,8 +1750,9 @@ SearchResultProto IcingSearchEngine::Search( ScopedTimer overall_timer(clock_->GetNewTimer(), [query_stats](int64_t t) { query_stats->set_latency_ms(t); }); - // TODO(b/146008613) Explore ideas to make this function read-only. - absl_ports::unique_lock l(&mutex_); + // Only an overall read-lock is required here. A finer-grained write-lock is + // provided around the LiteIndex. + absl_ports::shared_lock l(&mutex_); query_stats->set_lock_acquisition_latency_ms( overall_timer.timer().GetElapsedMilliseconds()); if (!initialized_) { @@ -1768,9 +1781,40 @@ SearchResultProto IcingSearchEngine::Search( query_stats->set_is_first_page(true); query_stats->set_requested_page_size(result_spec.num_per_page()); - // Process query and score - QueryScoringResults query_scoring_results = - ProcessQueryAndScore(search_spec, scoring_spec, result_spec); + const JoinSpecProto& join_spec = search_spec.join_spec(); + std::unique_ptr join_children_fetcher; + if (!join_spec.parent_property_expression().empty() && + !join_spec.child_property_expression().empty()) { + // Process child query + QueryScoringResults nested_query_scoring_results = + ProcessQueryAndScore(join_spec.nested_spec().search_spec(), + join_spec.nested_spec().scoring_spec(), + join_spec.nested_spec().result_spec(), + /*join_children_fetcher=*/nullptr); + // TOOD(b/256022027): set different kinds of latency for 2nd query. + if (!nested_query_scoring_results.status.ok()) { + TransformStatus(nested_query_scoring_results.status, result_status); + return result_proto; + } + + JoinProcessor join_processor(document_store_.get()); + // Building a JoinChildrenFetcher where child documents are grouped by + // their joinable values. + libtextclassifier3::StatusOr join_children_fetcher_or = + join_processor.GetChildrenFetcher( + search_spec.join_spec(), + std::move(nested_query_scoring_results.scored_document_hits)); + if (!join_children_fetcher_or.ok()) { + TransformStatus(join_children_fetcher_or.status(), result_status); + return result_proto; + } + join_children_fetcher = std::make_unique( + std::move(join_children_fetcher_or).ValueOrDie()); + } + + // Process parent query + QueryScoringResults query_scoring_results = ProcessQueryAndScore( + search_spec, scoring_spec, result_spec, join_children_fetcher.get()); int term_count = 0; for (const auto& section_and_terms : query_scoring_results.query_terms) { term_count += section_and_terms.second.size(); @@ -1793,26 +1837,13 @@ SearchResultProto IcingSearchEngine::Search( } std::unique_ptr ranker; - const JoinSpecProto& join_spec = search_spec.join_spec(); - if (!join_spec.parent_property_expression().empty() && - !join_spec.child_property_expression().empty()) { - // Process 2nd query - QueryScoringResults nested_query_scoring_results = - ProcessQueryAndScore(join_spec.nested_spec().search_spec(), - join_spec.nested_spec().scoring_spec(), - join_spec.nested_spec().result_spec()); - // TOOD(b/256022027): set different kinds of latency for 2nd query. - if (!nested_query_scoring_results.status.ok()) { - TransformStatus(nested_query_scoring_results.status, result_status); - return result_proto; - } - + if (join_children_fetcher != nullptr) { // Join 2 scored document hits JoinProcessor join_processor(document_store_.get()); libtextclassifier3::StatusOr> joined_result_document_hits_or = join_processor.Join( join_spec, std::move(query_scoring_results.scored_document_hits), - std::move(nested_query_scoring_results.scored_document_hits)); + *join_children_fetcher); if (!joined_result_document_hits_or.ok()) { TransformStatus(joined_result_document_hits_or.status(), result_status); return result_proto; @@ -1896,7 +1927,8 @@ SearchResultProto IcingSearchEngine::Search( IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, - const ResultSpecProto& result_spec) { + const ResultSpecProto& result_spec, + const JoinChildrenFetcher* join_children_fetcher) { std::unique_ptr component_timer = clock_->GetNewTimer(); // Gets unordered results from query processor @@ -1934,8 +1966,9 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( component_timer = clock_->GetNewTimer(); // Scores but does not rank the results. libtextclassifier3::StatusOr> - scoring_processor_or = ScoringProcessor::Create( - scoring_spec, document_store_.get(), schema_store_.get()); + scoring_processor_or = + ScoringProcessor::Create(scoring_spec, document_store_.get(), + schema_store_.get(), join_children_fetcher); if (!scoring_processor_or.ok()) { return QueryScoringResults(std::move(scoring_processor_or).status(), std::move(query_results.query_terms), diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index 221d86c..446e081 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -29,6 +29,7 @@ #include "icing/index/index.h" #include "icing/index/numeric/numeric-index.h" #include "icing/jni/jni-cache.h" +#include "icing/join/join-children-fetcher.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/performance-configuration.h" #include "icing/proto/debug.pb.h" @@ -587,10 +588,11 @@ class IcingSearchEngine { parse_query_latency_ms(parse_query_latency_ms_in), scoring_latency_ms(scoring_latency_ms_in) {} }; - QueryScoringResults ProcessQueryAndScore(const SearchSpecProto& search_spec, - const ScoringSpecProto& scoring_spec, - const ResultSpecProto& result_spec) - ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + QueryScoringResults ProcessQueryAndScore( + const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec, + const JoinChildrenFetcher* join_children_fetcher) + ICING_SHARED_LOCKS_REQUIRED(mutex_); // Many of the internal components rely on other components' derived data. // Check that everything is consistent with each other so that we're not diff --git a/icing/icing-search-engine_delete_test.cc b/icing/icing-search-engine_delete_test.cc new file mode 100644 index 0000000..c3b1ccd --- /dev/null +++ b/icing/icing-search-engine_delete_test.cc @@ -0,0 +1,768 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/file/mock-filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/schema-builder.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::IsEmpty; +using ::testing::Return; +using ::testing::SizeIs; +using ::testing::StrEq; +using ::testing::UnorderedElementsAre; + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to IcingSearchEngine::Delete*. +class IcingSearchEngineDeleteTest : public testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +// Non-zero value so we don't override it to be the current time +constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +SchemaProto CreateMessageSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +SchemaProto CreateEmailSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +ScoringSpecProto GetDefaultScoringSpec() { + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + return scoring_spec; +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteBySchemaType) { + SchemaProto schema; + // Add an email type + auto type = schema.add_types(); + type->set_schema_type("email"); + auto property = type->add_properties(); + property->set_property_name("subject"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::EXACT_ONLY); + property->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + // Add an message type + type = schema.add_types(); + type->set_schema_type("message"); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::EXACT_ONLY); + property->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("message") + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("email") + .AddStringProperty("subject", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete the first type. The first doc should be irretrievable. The + // second should still be present. + DeleteBySchemaTypeResultProto result_proto = + icing.DeleteBySchemaType("message"); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + DeleteStatsProto exp_stats; + exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE); + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(1); + EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace1, uri1) not found."); + expected_get_result_proto.clear_document(); + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Search for "message", only document2 should show up. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("message"); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteSchemaTypeByQuery) { + SchemaProto schema = CreateMessageSchema(); + // Add an email type + SchemaProto tmp = CreateEmailSchema(); + *schema.add_types() = tmp.types(0); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema(schema.types(0).schema_type()) + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema(schema.types(1).schema_type()) + .AddStringProperty("subject", "subject subject2") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete the first type. The first doc should be irretrievable. The + // second should still be present. + SearchSpecProto search_spec; + search_spec.add_schema_type_filters(schema.types(0).schema_type()); + EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk()); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace1, uri1) not found."); + expected_get_result_proto.clear_document(); + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + search_spec = SearchSpecProto::default_instance(); + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteByNamespace) { + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace3", "uri3") + .SetSchema("Message") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document3; + EXPECT_THAT( + icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete namespace1. Document1 and document2 should be irretrievable. + // Document3 should still be present. + DeleteByNamespaceResultProto result_proto = + icing.DeleteByNamespace("namespace1"); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + DeleteStatsProto exp_stats; + exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE); + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(2); + EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace1, uri1) not found."); + expected_get_result_proto.clear_document(); + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace1, uri2) not found."); + expected_get_result_proto.clear_document(); + EXPECT_THAT( + icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document3; + EXPECT_THAT( + icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Search for "message", only document3 should show up. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("message"); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteNamespaceByQuery) { + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete the first namespace. The first doc should be irretrievable. The + // second should still be present. + SearchSpecProto search_spec; + search_spec.add_namespace_filters("namespace1"); + EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk()); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace1, uri1) not found."); + expected_get_result_proto.clear_document(); + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + search_spec = SearchSpecProto::default_instance(); + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteByQuery) { + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete all docs containing 'body1'. The first doc should be irretrievable. + // The second should still be present. + SearchSpecProto search_spec; + search_spec.set_query("body1"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + DeleteByQueryStatsProto exp_stats; + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(1); + exp_stats.set_query_length(search_spec.query().length()); + exp_stats.set_num_terms(1); + exp_stats.set_num_namespaces_filtered(0); + exp_stats.set_num_schema_types_filtered(0); + exp_stats.set_parse_query_latency_ms(7); + exp_stats.set_document_removal_latency_ms(7); + EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace1, uri1) not found."); + expected_get_result_proto.clear_document(); + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + search_spec = SearchSpecProto::default_instance(); + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryReturnInfo) { + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace2", "uri3") + .SetSchema("Message") + .AddStringProperty("body", "message body3") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document3; + EXPECT_THAT( + icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete all docs to test the information is correctly grouped. + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + DeleteByQueryResultProto result_proto = + icing.DeleteByQuery(search_spec, true); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + DeleteByQueryStatsProto exp_stats; + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(3); + exp_stats.set_query_length(search_spec.query().length()); + exp_stats.set_num_terms(1); + exp_stats.set_num_namespaces_filtered(0); + exp_stats.set_num_schema_types_filtered(0); + exp_stats.set_parse_query_latency_ms(7); + exp_stats.set_document_removal_latency_ms(7); + EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats)); + + // Check that DeleteByQuery can return information for deleted documents. + DeleteByQueryResultProto::DocumentGroupInfo info1, info2; + info1.set_namespace_("namespace1"); + info1.set_schema("Message"); + info1.add_uris("uri1"); + info2.set_namespace_("namespace2"); + info2.set_schema("Message"); + info2.add_uris("uri3"); + info2.add_uris("uri2"); + EXPECT_THAT(result_proto.deleted_documents(), + UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2))); + + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); + EXPECT_THAT( + icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); +} + +TEST_F(IcingSearchEngineDeleteTest, DeleteByQueryNotFound) { + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Delete all docs containing 'foo', which should be none of them. Both docs + // should still be present. + SearchSpecProto search_spec; + search_spec.set_query("foo"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + search_spec = SearchSpecProto::default_instance(); + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc new file mode 100644 index 0000000..6ff21fb --- /dev/null +++ b/icing/icing-search-engine_initialization_test.cc @@ -0,0 +1,1920 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/file/mock-filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/legacy/index/icing-mock-filesystem.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/store/document-log-creator.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::_; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Return; +using ::testing::SizeIs; + +constexpr std::string_view kIpsumText = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis " + "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida " + "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam " + "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo " + "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, " + "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula " + "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et " + "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, " + "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis " + "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. " + "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. " + "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur " + "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh " + "placerat semper."; + +PortableFileBackedProtoLog::Header ReadDocumentLogHeader( + Filesystem filesystem, const std::string& file_path) { + PortableFileBackedProtoLog::Header header; + filesystem.PRead(file_path.c_str(), &header, + sizeof(PortableFileBackedProtoLog::Header), + /*offset=*/0); + return header; +} + +void WriteDocumentLogHeader( + Filesystem filesystem, const std::string& file_path, + PortableFileBackedProtoLog::Header& header) { + filesystem.Write(file_path.c_str(), &header, + sizeof(PortableFileBackedProtoLog::Header)); +} + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to +// IcingSearchEngine::Initialize. +class IcingSearchEngineInitializationTest : public testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +// Non-zero value so we don't override it to be the current time +constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; + +std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; } + +std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; } + +std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; } + +std::string GetHeaderFilename() { + return GetTestBaseDir() + "/icing_search_engine_header"; +} + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { + return DocumentBuilder() + .SetKey(std::move(name_space), std::move(uri)) + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); +} + +DocumentProto CreateEmailDocument(const std::string& name_space, + const std::string& uri, int score, + const std::string& subject_content, + const std::string& body_content) { + return DocumentBuilder() + .SetKey(name_space, uri) + .SetSchema("Email") + .SetScore(score) + .AddStringProperty("subject", subject_content) + .AddStringProperty("body", body_content) + .Build(); +} + +SchemaProto CreateMessageSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +SchemaProto CreateEmailSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +ScoringSpecProto GetDefaultScoringSpec() { + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + return scoring_spec; +} + +TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializingAgainSavesNonPersistedData) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; + + ASSERT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineInitializationTest, + MaxIndexMergeSizeReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(std::numeric_limits::max()); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, + NegativeMergeSizeReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(-1); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, + ZeroMergeSizeReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(0); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + // One is fine, if a bit weird. It just means that the lite index will be + // smaller and will request a merge any time content is added to it. + options.set_index_merge_size(1); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineInitializationTest, + NegativeMaxTokenLenReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_max_token_length(-1); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, + ZeroMaxTokenLenReturnsInvalidArgument) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_max_token_length(0); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) { + auto mock_filesystem = std::make_unique(); + // This fails DocumentStore::Create() + ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_)) + .WillByDefault(Return(false)); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); + + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), + ProtoStatusIs(StatusProto::INTERNAL)); + EXPECT_THAT(initialize_result_proto.status().message(), + HasSubstr("Could not create directory")); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitMarkerFilePreviousFailuresAtThreshold) { + Filesystem filesystem; + DocumentProto email1 = + CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); + email1.set_creation_timestamp_ms(10000); + DocumentProto email2 = + CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); + email2.set_creation_timestamp_ms(10000); + + { + // Create an index with a few documents. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoIsOk()); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(0)); + ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + } + + // Write an init marker file with 5 previously failed attempts. + std::string marker_filepath = GetTestBaseDir() + "/init_marker"; + + { + ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); + int network_init_attempts = GHostToNetworkL(5); + // Write the updated number of attempts before we get started. + ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, + &network_init_attempts, + sizeof(network_init_attempts))); + ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); + } + + { + // Create the index again and verify that initialization succeeds and no + // data is thrown out. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoIsOk()); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(5)); + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) + .document(), + EqualsProto(email1)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .document(), + EqualsProto(email2)); + } + + // The successful init should have thrown out the marker file. + ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitMarkerFilePreviousFailuresBeyondThreshold) { + Filesystem filesystem; + DocumentProto email1 = + CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); + DocumentProto email2 = + CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); + + { + // Create an index with a few documents. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoIsOk()); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(0)); + ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + } + + // Write an init marker file with 6 previously failed attempts. + std::string marker_filepath = GetTestBaseDir() + "/init_marker"; + + { + ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); + int network_init_attempts = GHostToNetworkL(6); + // Write the updated number of attempts before we get started. + ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, + &network_init_attempts, + sizeof(network_init_attempts))); + ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); + } + + { + // Create the index again and verify that initialization succeeds and all + // data is thrown out. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), + ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(6)); + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + } + + // The successful init should have thrown out the marker file. + ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); +} + +TEST_F(IcingSearchEngineInitializationTest, + SuccessiveInitFailuresIncrementsInitMarker) { + Filesystem filesystem; + DocumentProto email1 = + CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); + DocumentProto email2 = + CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); + + { + // 1. Create an index with a few documents. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoIsOk()); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(0)); + ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + } + + { + // 2. Create an index that will encounter an IO failure when trying to + // create the document log. + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + + auto mock_filesystem = std::make_unique(); + std::string document_log_filepath = + icing_options.base_dir() + "/document_dir/document_log_v1"; + auto get_filesize_lambda = [this, + &document_log_filepath](const char* filename) { + if (strncmp(document_log_filepath.c_str(), filename, + document_log_filepath.length()) == 0) { + return Filesystem::kBadFileSize; + } + return this->filesystem()->GetFileSize(filename); + }; + ON_CALL(*mock_filesystem, GetFileSize(A())) + .WillByDefault(get_filesize_lambda); + + TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), + GetTestJniCache()); + + // Fail to initialize six times in a row. + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(0)); + + init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(1)); + + init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(2)); + + init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(3)); + + init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(4)); + + init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(5)); + } + + { + // 3. Create the index again and verify that initialization succeeds and all + // data is thrown out. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + ASSERT_THAT(init_result.status(), + ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); + ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), + Eq(6)); + + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + } + + // The successful init should have thrown out the marker file. + std::string marker_filepath = GetTestBaseDir() + "/init_marker"; + ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); +} + +TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) { + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + CreateMessageDocument("namespace", "uri"); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + CreateMessageDocument("namespace", "uri"); + + { + // Basic initialization/setup + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str())); + + // We should be able to recover from this and access all our previous data + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Checks that DocumentLog is still ok + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Checks that the index is still ok so we can search over it + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Checks that Schema is still since it'll be needed to validate the document + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); +} + +TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) { + { + // Basic initialization/setup + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + CreateMessageDocument("namespace", "uri"); + + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + const std::string schema_file = + absl_ports::StrCat(GetSchemaDir(), "/schema.pb"); + const std::string corrupt_data = "1234"; + EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(), + corrupt_data.size())); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INTERNAL)); +} + +TEST_F(IcingSearchEngineInitializationTest, + UnableToRecoverFromCorruptDocumentLog) { + { + // Basic initialization/setup + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + CreateMessageDocument("namespace", "uri"); + + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); + const std::string corrupt_data = "1234"; + EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(), + corrupt_data.data(), corrupt_data.size())); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::INTERNAL)); +} + +TEST_F(IcingSearchEngineInitializationTest, + RecoverFromInconsistentSchemaStore) { + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2_with_additional_property = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("additional", "content") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + { + // Initializes folder and schema + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaProto schema; + auto type = schema.add_types(); + type->set_schema_type("Message"); + + auto property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + property->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + property->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + property = type->add_properties(); + property->set_property_name("additional"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document2_with_additional_property).status(), + ProtoIsOk()); + + // Won't get us anything because "additional" isn't marked as an indexed + // property in the schema + SearchSpecProto search_spec; + search_spec.set_query("additional:content"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + { + // This schema will change the SchemaTypeIds from the previous schema_ + // (since SchemaTypeIds are assigned based on order of the types, and this + // new schema changes the ordering of previous types) + SchemaProto new_schema; + auto type = new_schema.add_types(); + type->set_schema_type("Email"); + + type = new_schema.add_types(); + type->set_schema_type("Message"); + + // Adding a new property changes the SectionIds (since SectionIds are + // assigned based on alphabetical order of indexed sections, marking + // "additional" as an indexed property will push the "body" property to a + // different SectionId) + auto property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + property->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + property->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + property = type->add_properties(); + property->set_property_name("additional"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + property->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + // Write the marker file + std::string marker_filepath = + absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); + ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); + ASSERT_TRUE(sfd.is_valid()); + + // Write the new schema + FakeClock fake_clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr schema_store, + SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); + ICING_EXPECT_OK(schema_store->SetSchema(new_schema)); + } // Will persist new schema + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // We can insert a Email document since we kept the new schema + DocumentProto email_document = + DocumentBuilder() + .SetKey("namespace", "email_uri") + .SetSchema("Email") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = email_document; + + EXPECT_THAT(icing.Get("namespace", "email_uri", + GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + SearchSpecProto search_spec; + + // The section restrict will ensure we are using the correct, updated + // SectionId in the Index + search_spec.set_query("additional:content"); + + // Schema type filter will ensure we're using the correct, updated + // SchemaTypeId in the DocumentStore + search_spec.add_schema_type_filters("Message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2_with_additional_property; + + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineInitializationTest, + RecoverFromInconsistentDocumentStore) { + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + + { + // Initializes folder and schema, index one document + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + } // This should shut down IcingSearchEngine and persist anything it needs to + + { + FakeClock fake_clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr schema_store, + SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); + ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema())); + + // Puts a second document into DocumentStore but doesn't index it. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock, + schema_store.get())); + std::unique_ptr document_store = + std::move(create_result.document_store); + + ICING_EXPECT_OK(document_store->Put(document2)); + } + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + // Index Restoration should be triggered here and document2 should be + // indexed. + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + + // DocumentStore kept the additional document + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // We indexed the additional document + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineInitializationTest, RecoverFromInconsistentIndex) { + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + CreateMessageDocument("namespace", "uri"); + + { + // Initializes folder and schema, index one document + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + // Pretend we lost the entire index + EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively( + absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str())); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Check that our index is ok by searching over the restored index + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) { + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + CreateMessageDocument("namespace", "uri"); + + { + // Initializes folder and schema, index one document + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + // Pretend index is corrupted + const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb"; + ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str())); + ASSERT_TRUE(fd.is_valid()); + ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Check that our index is ok by searching over the restored index + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + + SchemaProto email_schema = CreateMessageSchema(); + EXPECT_THAT(icing.SetSchema(email_schema).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.GetSchema().status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + + DocumentProto doc = CreateMessageDocument("namespace", "uri"); + EXPECT_THAT(icing.Put(doc).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing + .Get(doc.namespace_(), doc.uri(), + GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type()) + .status() + .code(), + Eq(StatusProto::FAILED_PRECONDITION)); + + SearchSpecProto search_spec = SearchSpecProto::default_instance(); + ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance(); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + constexpr int kSomePageToken = 12; + EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash. + + EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.Optimize().status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); +} + +TEST_F(IcingSearchEngineInitializationTest, RestoreIndex) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", kIpsumText) + .Build(); + // 1. Create an index with a LiteIndex that will only allow one document + // before needing a merge. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Add two documents. These should get merged into the main index. + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = DocumentBuilder(document).SetUri("fake_type/1").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + // Add one document. This one should get remain in the lite index. + document = DocumentBuilder(document).SetUri("fake_type/2").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // 2. Delete the index file to trigger RestoreIndexIfNeeded. + std::string idx_subdir = GetIndexDir() + "/idx"; + filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); + + // 3. Create the index again. This should trigger index restoration. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("consectetur"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + SearchResultProto results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.next_page_token(), Eq(0)); + // All documents should be retrievable. + ASSERT_THAT(results.results(), SizeIs(3)); + EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2")); + EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1")); + EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0")); + } +} + +TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseLiteIndex) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", kIpsumText) + .Build(); + // 1. Create an index with a LiteIndex that will only allow one document + // before needing a merge. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Add two documents. These should get merged into the main index. + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = DocumentBuilder(document).SetUri("fake_type/1").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + // Add one document. This one should get remain in the lite index. + document = DocumentBuilder(document).SetUri("fake_type/2").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // 2. Delete the last document from the document log + { + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); + filesystem()->DeleteFile(document_log_file.c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + auto create_result, + PortableFileBackedProtoLog::Create( + filesystem(), document_log_file.c_str(), + PortableFileBackedProtoLog::Options( + /*compress_in=*/true))); + std::unique_ptr> document_log = + std::move(create_result.proto_log); + + document = DocumentBuilder(document).SetUri("fake_type/0").Build(); + DocumentWrapper wrapper; + *wrapper.mutable_document() = document; + ASSERT_THAT(document_log->WriteProto(wrapper), IsOk()); + + document = DocumentBuilder(document).SetUri("fake_type/1").Build(); + *wrapper.mutable_document() = document; + ASSERT_THAT(document_log->WriteProto(wrapper), IsOk()); + } + + // 3. Create the index again. This should throw out the lite index and trigger + // index restoration which will only restore the two documents in the main + // index. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("consectetur"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + SearchResultProto results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.next_page_token(), Eq(0)); + // Only the documents that were in the main index should be retrievable. + ASSERT_THAT(results.results(), SizeIs(2)); + EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1")); + EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0")); + } +} + +TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIndex) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", kIpsumText) + .Build(); + // 1. Create an index with a LiteIndex that will only allow one document + // before needing a merge. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Add two documents. These should get merged into the main index. + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = DocumentBuilder(document).SetUri("fake_type/1").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + // Add one document. This one should get remain in the lite index. + document = DocumentBuilder(document).SetUri("fake_type/2").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // 2. Delete the last two documents from the document log. + { + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); + filesystem()->DeleteFile(document_log_file.c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + auto create_result, + PortableFileBackedProtoLog::Create( + filesystem(), document_log_file.c_str(), + PortableFileBackedProtoLog::Options( + /*compress_in=*/true))); + std::unique_ptr> document_log = + std::move(create_result.proto_log); + + document = DocumentBuilder(document).SetUri("fake_type/0").Build(); + DocumentWrapper wrapper; + *wrapper.mutable_document() = document; + ASSERT_THAT(document_log->WriteProto(wrapper), IsOk()); + } + + // 3. Create the index again. This should throw out the lite and main index + // and trigger index restoration. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("consectetur"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + SearchResultProto results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.next_page_token(), Eq(0)); + // Only the first document should be retrievable. + ASSERT_THAT(results.results(), SizeIs(1)); + EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0")); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) { + // 1. Create an index with a single document in it that has no indexed + // content. + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Set a schema for a single type that has no indexed properties. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("unindexedField") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // Add a document that contains no indexed content. + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("unindexedField", + "Don't you dare search over this!") + .Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // 2. Create the index again. This should NOT trigger a recovery of any kind. + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) { + // 1. Create an index with a single document in it that has no valid indexed + // tokens in its content. + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Set a schema for a single type that has no indexed properties. + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Add a document that contains no valid indexed content - just punctuation. + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "?...!") + .Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // 2. Create the index again. This should NOT trigger a recovery of any kind. + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogFunctionLatency) { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10)); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogNumberOfDocuments) { + DocumentProto document1 = DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("icing", "fake_type/2") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + + { + // Initialize and put a document. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), + Eq(0)); + + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), + Eq(1)); + + // Put another document. + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), + Eq(2)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) { + // Even though the fake timer will return 10, all the latency numbers related + // to recovery / restoration should be 0 during the first-time initialization. + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCausePartialDataLoss) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + + { + // Initialize and put a document. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + { + // Append a non-checksummed document. This will mess up the checksum of the + // proto log, forcing it to rewind and later return a DATA_LOSS error. + const std::string serialized_document = document.SerializeAsString(); + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); + + int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str()); + filesystem()->PWrite(document_log_file.c_str(), file_size, + serialized_document.data(), + serialized_document.size()); + } + + { + // Document store will rewind to previous checkpoint. The cause should be + // DATA_LOSS and the data status should be PARTIAL_LOSS. + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::DATA_LOSS)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(10)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::PARTIAL_LOSS)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .index_restoration_latency_ms(), + Eq(0)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCauseCompleteDataLoss) { + DocumentProto document1 = DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + + const std::string document_log_file = absl_ports::StrCat( + GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); + int64_t corruptible_offset; + + { + // Initialize and put a document. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // There's some space at the beginning of the file (e.g. header, kmagic, + // etc) that is necessary to initialize the FileBackedProtoLog. We can't + // corrupt that region, so we need to figure out the offset at which + // documents will be written to - which is the file size after + // initialization. + corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str()); + + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + } + + { + // "Corrupt" the content written in the log. Make the corrupt document + // smaller than our original one so we don't accidentally write past our + // file. + DocumentProto document = + DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); + std::string serialized_document = document.SerializeAsString(); + ASSERT_TRUE(filesystem()->PWrite( + document_log_file.c_str(), corruptible_offset, + serialized_document.data(), serialized_document.size())); + + PortableFileBackedProtoLog::Header header = + ReadDocumentLogHeader(*filesystem(), document_log_file); + + // Set dirty bit to true to reflect that something changed in the log. + header.SetDirtyFlag(true); + header.SetHeaderChecksum(header.CalculateHeaderChecksum()); + + WriteDocumentLogHeader(*filesystem(), document_log_file, header); + } + + { + // Document store will completely rewind. The cause should be DATA_LOSS and + // the data status should be COMPLETE_LOSS. + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::DATA_LOSS)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(10)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::COMPLETE_LOSS)); + // The complete rewind of ground truth causes us to clear the index, but + // that's not considered a restoration. + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .index_restoration_latency_ms(), + Eq(0)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + { + // Initialize and put a document. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + { + // Delete the index file to trigger RestoreIndexIfNeeded. + std::string idx_subdir = GetIndexDir() + "/idx"; + filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); + } + + { + // Index is empty but ground truth is not. Index should be restored due to + // the inconsistency. + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .index_restoration_latency_ms(), + Eq(10)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + { + // Initialize and put one document. + IcingSearchEngine icing(options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + { + // Simulate a schema change where power is lost after the schema is written. + SchemaProto new_schema = + SchemaBuilder() + .AddType( + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + // Write the marker file + std::string marker_filepath = + absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); + ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); + ASSERT_TRUE(sfd.is_valid()); + + // Write the new schema + FakeClock fake_clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr schema_store, + SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); + ICING_EXPECT_OK(schema_store->SetSchema(new_schema)); + } + + { + // Both document store and index should be recovered from checksum mismatch. + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .index_restoration_latency_ms(), + Eq(10)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(10)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); + } + + { + // No recovery should be needed. + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .index_restoration_latency_ms(), + Eq(0)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCauseIndexIOError) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + { + // Initialize and put one document. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // lambda to fail OpenForWrite on lite index hit buffer once. + bool has_failed_already = false; + auto open_write_lambda = [this, &has_failed_already](const char* filename) { + std::string lite_index_buffer_file_path = + absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb"); + std::string filename_string(filename); + if (!has_failed_already && filename_string == lite_index_buffer_file_path) { + has_failed_already = true; + return -1; + } + return this->filesystem()->OpenForWrite(filename); + }; + + auto mock_icing_filesystem = std::make_unique(); + // This fails Index::Create() once. + ON_CALL(*mock_icing_filesystem, OpenForWrite) + .WillByDefault(open_write_lambda); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::move(mock_icing_filesystem), + std::move(fake_clock), GetTestJniCache()); + + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_latency_ms(), + Eq(10)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCauseDocStoreIOError) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + { + // Initialize and put one document. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // lambda to fail Read on document store header once. + bool has_failed_already = false; + auto read_lambda = [this, &has_failed_already](const char* filename, + void* buf, size_t buf_size) { + std::string document_store_header_file_path = + absl_ports::StrCat(GetDocumentDir(), "/document_store_header"); + std::string filename_string(filename); + if (!has_failed_already && + filename_string == document_store_header_file_path) { + has_failed_already = true; + return false; + } + return this->filesystem()->Read(filename, buf, buf_size); + }; + + auto mock_filesystem = std::make_unique(); + // This fails DocumentStore::InitializeDerivedFiles() once. + ON_CALL(*mock_filesystem, Read(A(), _, _)) + .WillByDefault(read_lambda); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::move(mock_filesystem), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(10)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(0)); +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogRecoveryCauseSchemaStoreIOError) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + } + + { + // Delete the schema store header file to trigger an I/O error. + std::string schema_store_header_file_path = + GetSchemaDir() + "/schema_store_header"; + filesystem()->DeleteFile(schema_store_header_file_path.c_str()); + } + + { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .schema_store_recovery_latency_ms(), + Eq(10)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .document_store_recovery_latency_ms(), + Eq(0)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT( + initialize_result_proto.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(initialize_result_proto.initialize_stats() + .index_restoration_latency_ms(), + Eq(0)); + } +} + +TEST_F(IcingSearchEngineInitializationTest, + InitializeShouldLogNumberOfSchemaTypes) { + { + // Initialize an empty storage. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + // There should be 0 schema types. + EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), + Eq(0)); + + // Set a schema with one type config. + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + // There should be 1 schema type. + EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), + Eq(1)); + + // Create and set a schema with two type configs: Email and Message. + SchemaProto schema = CreateEmailSchema(); + + auto type = schema.add_types(); + type->set_schema_type("Message"); + auto body = type->add_properties(); + body->set_property_name("body"); + body->set_data_type(PropertyConfigProto::DataType::STRING); + body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + body->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + body->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto initialize_result_proto = icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), + Eq(2)); + } +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc new file mode 100644 index 0000000..da02c4a --- /dev/null +++ b/icing/icing-search-engine_optimize_test.cc @@ -0,0 +1,974 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/file/mock-filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/schema-builder.h" +#include "icing/store/document-log-creator.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::Lt; +using ::testing::Return; + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to +// IcingSearchEngine::Optimize. +class IcingSearchEngineOptimizeTest : public testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +// Non-zero value so we don't override it to be the current time +constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { + return DocumentBuilder() + .SetKey(std::move(name_space), std::move(uri)) + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); +} + +SchemaProto CreateMessageSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +ScoringSpecProto GetDefaultScoringSpec() { + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + return scoring_spec; +} + +TEST_F(IcingSearchEngineOptimizeTest, + AllPageTokensShouldBeInvalidatedAfterOptimization) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + // Searches and gets the first page, 1 result + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); + uint64_t next_page_token = search_result_proto.next_page_token(); + // Since the token is a random number, we don't need to verify + expected_search_result_proto.set_next_page_token(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + // Now document1 is still to be fetched. + + OptimizeResultProto optimize_result_proto; + optimize_result_proto.mutable_status()->set_code(StatusProto::OK); + optimize_result_proto.mutable_status()->set_message(""); + OptimizeResultProto actual_result = icing.Optimize(); + actual_result.clear_optimize_stats(); + ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto)); + + // Tries to fetch the second page, no results since all tokens have been + // invalidated during Optimize() + expected_search_result_proto.clear_results(); + expected_search_result_proto.clear_next_page_token(); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, OptimizationShouldRemoveDeletedDocs) { + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri1) not found."); + { + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // Deletes document1 + ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); + const std::string document_log_path = + icing_options.base_dir() + "/document_dir/" + + DocumentLogCreator::GetDocumentLogFilename(); + int64_t document_log_size_before = + filesystem()->GetFileSize(document_log_path.c_str()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + int64_t document_log_size_after = + filesystem()->GetFileSize(document_log_path.c_str()); + + // Validates that document can't be found right after Optimize() + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + // Validates that document is actually removed from document log + EXPECT_THAT(document_log_size_after, Lt(document_log_size_before)); + } // Destroys IcingSearchEngine to make sure nothing is cached. + + IcingSearchEngine icing(icing_options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, + OptimizationShouldDeleteTemporaryDirectory) { + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Create a tmp dir that will be used in Optimize() to swap files, + // this validates that any tmp dirs will be deleted before using. + const std::string tmp_dir = + icing_options.base_dir() + "/document_dir_optimize_tmp"; + + const std::string tmp_file = tmp_dir + "/file"; + ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str())); + ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str())); + ASSERT_TRUE(fd.is_valid()); + ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); + fd.reset(); + + EXPECT_THAT(icing.Optimize().status(), ProtoIsOk()); + + EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str())); + EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str())); +} + +TEST_F(IcingSearchEngineOptimizeTest, GetOptimizeInfoHasCorrectStats) { + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(500) + .Build(); + + { + auto fake_clock = std::make_unique(); + fake_clock->SetSystemTimeMilliseconds(1000); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Just initialized, nothing is optimizable yet. + GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo(); + EXPECT_THAT(optimize_info.status(), ProtoIsOk()); + EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0)); + EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0)); + EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); + + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // Only have active documents, nothing is optimizable yet. + optimize_info = icing.GetOptimizeInfo(); + EXPECT_THAT(optimize_info.status(), ProtoIsOk()); + EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0)); + EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0)); + EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); + + // Deletes document1 + ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); + + optimize_info = icing.GetOptimizeInfo(); + EXPECT_THAT(optimize_info.status(), ProtoIsOk()); + EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1)); + EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0)); + EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); + int64_t first_estimated_optimizable_bytes = + optimize_info.estimated_optimizable_bytes(); + + // Add a second document, but it'll be expired since the time (1000) is + // greater than the document's creation timestamp (100) + the document's ttl + // (500) + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + optimize_info = icing.GetOptimizeInfo(); + EXPECT_THAT(optimize_info.status(), ProtoIsOk()); + EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2)); + EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), + Gt(first_estimated_optimizable_bytes)); + EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); + + // Optimize + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + } + + { + // Recreate with new time + auto fake_clock = std::make_unique(); + fake_clock->SetSystemTimeMilliseconds(5000); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Nothing is optimizable now that everything has been optimized away. + GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo(); + EXPECT_THAT(optimize_info.status(), ProtoIsOk()); + EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0)); + EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0)); + EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000)); + } +} + +TEST_F(IcingSearchEngineOptimizeTest, GetAndPutShouldWorkAfterOptimization) { + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); + DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + + // Validates that Get() and Put() are good right after Optimize() + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); + *expected_get_result_proto.mutable_document() = document3; + EXPECT_THAT( + icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk()); + } // Destroys IcingSearchEngine to make sure nothing is cached. + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); + *expected_get_result_proto.mutable_document() = document3; + EXPECT_THAT( + icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + *expected_get_result_proto.mutable_document() = document4; + EXPECT_THAT( + icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineOptimizeTest, + GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) { + DocumentProto empty_document1 = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto empty_document2 = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto empty_document3 = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetSchema("Message") + .AddStringProperty("body", "") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + + // Validates that Get() and Put() are good right after Optimize() + *expected_get_result_proto.mutable_document() = empty_document1; + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); + EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineOptimizeTest, DeleteShouldWorkAfterOptimization) { + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + + // Validates that Delete() works right after Optimize() + EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code( + StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri1) not found."); + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_get_result_proto.mutable_status()->clear_message(); + *expected_get_result_proto.mutable_document() = document2; + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + } // Destroys IcingSearchEngine to make sure nothing is cached. + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri1) not found."); + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri2) not found."); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, OptimizationFailureUninitializesIcing) { + // Setup filesystem to fail + auto mock_filesystem = std::make_unique(); + bool just_swapped_files = false; + auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) { + if (just_swapped_files) { + // We should fail the first call immediately after swapping files. + just_swapped_files = false; + return false; + } + return filesystem()->CreateDirectoryRecursively(dir_name); + }; + ON_CALL(*mock_filesystem, CreateDirectoryRecursively) + .WillByDefault(create_dir_lambda); + + auto swap_lambda = [&just_swapped_files](const char* first_dir, + const char* second_dir) { + just_swapped_files = true; + return false; + }; + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"), + HasSubstr("document_dir"))) + .WillByDefault(swap_lambda); + TestIcingSearchEngine icing(options, std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // The mocks should cause an unrecoverable error during Optimize - returning + // INTERNAL. + ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL)); + + // Ordinary operations should fail safely. + SchemaProto simple_schema; + auto type = simple_schema.add_types(); + type->set_schema_type("type0"); + auto property = type->add_properties(); + property->set_property_name("prop0"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + DocumentProto simple_doc = DocumentBuilder() + .SetKey("namespace0", "uri0") + .SetSchema("type0") + .AddStringProperty("prop0", "foo") + .Build(); + + SearchSpecProto search_spec; + search_spec.set_query("foo"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + ResultSpecProto result_spec; + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + + EXPECT_THAT(icing.SetSchema(simple_schema).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.Put(simple_doc).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing + .Get(simple_doc.namespace_(), simple_doc.uri(), + GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + + // Reset should get icing back to a safe (empty) and working state. + EXPECT_THAT(icing.Reset().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk()); + EXPECT_THAT(icing + .Get(simple_doc.namespace_(), simple_doc.uri(), + GetResultSpecProto::default_instance()) + .status(), + ProtoIsOk()); + EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), + ProtoIsOk()); +} + +TEST_F(IcingSearchEngineOptimizeTest, SetSchemaShouldWorkAfterOptimization) { + // Creates 3 test schemas + SchemaProto schema1 = SchemaProto(CreateMessageSchema()); + + SchemaProto schema2 = SchemaProto(schema1); + auto new_property2 = schema2.mutable_types(0)->add_properties(); + new_property2->set_property_name("property2"); + new_property2->set_data_type(PropertyConfigProto::DataType::STRING); + new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + new_property2->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + new_property2->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + SchemaProto schema3 = SchemaProto(schema2); + auto new_property3 = schema3.mutable_types(0)->add_properties(); + new_property3->set_property_name("property3"); + new_property3->set_data_type(PropertyConfigProto::DataType::STRING); + new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + new_property3->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + new_property3->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + + // Validates that SetSchema() works right after Optimize() + EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk()); + } // Destroys IcingSearchEngine to make sure nothing is cached. + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineOptimizeTest, SearchShouldWorkAfterOptimization) { + DocumentProto document = CreateMessageDocument("namespace", "uri"); + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + + // Validates that Search() works right after Optimize() + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + } // Destroys IcingSearchEngine to make sure nothing is cached. + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, + IcingShouldWorkFineIfOptimizationIsAborted) { + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + { + // Initializes a normal icing to create files needed + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + } + + // Creates a mock filesystem in which DeleteDirectoryRecursively() always + // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes + // it return ABORTED_ERROR. + auto mock_filesystem = std::make_unique(); + ON_CALL(*mock_filesystem, + DeleteDirectoryRecursively(HasSubstr("_optimize_tmp"))) + .WillByDefault(Return(false)); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED)); + + // Now optimization is aborted, we verify that document-related functions + // still work as expected. + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document1; + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + + EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("m"); + search_spec.set_term_match_type(TermMatchType::PREFIX); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, + OptimizationShouldRecoverIfFileDirectoriesAreMissing) { + // Creates a mock filesystem in which SwapFiles() always fails and deletes the + // directories. This will fail IcingSearchEngine::OptimizeDocumentStore(). + auto mock_filesystem = std::make_unique(); + ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"), + HasSubstr("document_dir"))) + .WillByDefault([this](const char* one, const char* two) { + filesystem()->DeleteDirectoryRecursively(one); + filesystem()->DeleteDirectoryRecursively(two); + return false; + }); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + + // Optimize() fails due to filesystem error + OptimizeResultProto result = icing.Optimize(); + EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); + // Should rebuild the index for data loss. + EXPECT_THAT(result.optimize_stats().index_restoration_mode(), + Eq(OptimizeStatsProto::FULL_INDEX_REBUILD)); + + // Document is not found because original file directory is missing + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri) not found."); + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + DocumentProto new_document = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "new body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("m"); + search_spec.set_term_match_type(TermMatchType::PREFIX); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + // Searching old content returns nothing because original file directory is + // missing + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + search_spec.set_query("n"); + + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + new_document; + + // Searching new content returns the new document + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, + OptimizationShouldRecoverIfDataFilesAreMissing) { + // Creates a mock filesystem in which SwapFiles() always fails and empties the + // directories. This will fail IcingSearchEngine::OptimizeDocumentStore(). + auto mock_filesystem = std::make_unique(); + ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"), + HasSubstr("document_dir"))) + .WillByDefault([this](const char* one, const char* two) { + filesystem()->DeleteDirectoryRecursively(one); + filesystem()->CreateDirectoryRecursively(one); + filesystem()->DeleteDirectoryRecursively(two); + filesystem()->CreateDirectoryRecursively(two); + return false; + }); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + + // Optimize() fails due to filesystem error + OptimizeResultProto result = icing.Optimize(); + EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); + // Should rebuild the index for data loss. + EXPECT_THAT(result.optimize_stats().index_restoration_mode(), + Eq(OptimizeStatsProto::FULL_INDEX_REBUILD)); + + // Document is not found because original files are missing + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri) not found."); + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + DocumentProto new_document = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "new body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("m"); + search_spec.set_term_match_type(TermMatchType::PREFIX); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + // Searching old content returns nothing because original files are missing + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + search_spec.set_query("n"); + + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + new_document; + + // Searching new content returns the new document + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineOptimizeTest, OptimizeStatsProtoTest) { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(5); + fake_clock->SetSystemTimeMilliseconds(10000); + auto icing = std::make_unique( + GetDefaultIcingOptions(), std::make_unique(), + std::make_unique(), std::move(fake_clock), + GetTestJniCache()); + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Create three documents. + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + document2.set_creation_timestamp_ms(9000); + document2.set_ttl_ms(500); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk()); + + // Delete the first document. + ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(), + ProtoIsOk()); + ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); + + OptimizeStatsProto expected; + expected.set_latency_ms(5); + expected.set_document_store_optimize_latency_ms(5); + expected.set_index_restoration_latency_ms(5); + expected.set_num_original_documents(3); + expected.set_num_deleted_documents(1); + expected.set_num_expired_documents(1); + expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION); + + // Run Optimize + OptimizeResultProto result = icing->Optimize(); + // Depending on how many blocks the documents end up spread across, it's + // possible that Optimize can remove documents without shrinking storage. The + // first Optimize call will also write the OptimizeStatusProto for the first + // time which will take up 1 block. So make sure that before_size is no less + // than after_size - 1 block. + uint32_t page_size = getpagesize(); + EXPECT_THAT(result.optimize_stats().storage_size_before(), + Ge(result.optimize_stats().storage_size_after() - page_size)); + result.mutable_optimize_stats()->clear_storage_size_before(); + result.mutable_optimize_stats()->clear_storage_size_after(); + EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); + + fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(5); + fake_clock->SetSystemTimeMilliseconds(20000); + icing = std::make_unique( + GetDefaultIcingOptions(), std::make_unique(), + std::make_unique(), std::move(fake_clock), + GetTestJniCache()); + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + + expected = OptimizeStatsProto(); + expected.set_latency_ms(5); + expected.set_document_store_optimize_latency_ms(5); + expected.set_index_restoration_latency_ms(5); + expected.set_num_original_documents(1); + expected.set_num_deleted_documents(0); + expected.set_num_expired_documents(0); + expected.set_time_since_last_optimize_ms(10000); + expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION); + + // Run Optimize + result = icing->Optimize(); + EXPECT_THAT(result.optimize_stats().storage_size_before(), + Eq(result.optimize_stats().storage_size_after())); + result.mutable_optimize_stats()->clear_storage_size_before(); + result.mutable_optimize_stats()->clear_storage_size_after(); + EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); + + // Delete the last document. + ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(), + ProtoIsOk()); + + expected = OptimizeStatsProto(); + expected.set_latency_ms(5); + expected.set_document_store_optimize_latency_ms(5); + expected.set_index_restoration_latency_ms(5); + expected.set_num_original_documents(1); + expected.set_num_deleted_documents(1); + expected.set_num_expired_documents(0); + expected.set_time_since_last_optimize_ms(0); + // Should rebuild the index since all documents are removed. + expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD); + + // Run Optimize + result = icing->Optimize(); + EXPECT_THAT(result.optimize_stats().storage_size_before(), + Ge(result.optimize_stats().storage_size_after())); + result.mutable_optimize_stats()->clear_storage_size_before(); + result.mutable_optimize_stats()->clear_storage_size_after(); + EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_put_test.cc b/icing/icing-search-engine_put_test.cc new file mode 100644 index 0000000..ed72f17 --- /dev/null +++ b/icing/icing-search-engine_put_test.cc @@ -0,0 +1,481 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/legacy/index/icing-mock-filesystem.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/schema-builder.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/random-string.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::Eq; +using ::testing::Ge; +using ::testing::HasSubstr; +using ::testing::IsEmpty; +using ::testing::Le; +using ::testing::SizeIs; + +constexpr std::string_view kIpsumText = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis " + "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida " + "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam " + "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo " + "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, " + "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula " + "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et " + "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, " + "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis " + "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. " + "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. " + "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur " + "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh " + "placerat semper."; + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to IcingSearchEngine::Put. +class IcingSearchEnginePutTest : public testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1; + +// Non-zero value so we don't override it to be the current time +constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; + +std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; } + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { + return DocumentBuilder() + .SetKey(std::move(name_space), std::move(uri)) + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); +} + +SchemaProto CreateMessageSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +ScoringSpecProto GetDefaultScoringSpec() { + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + return scoring_spec; +} + +TEST_F(IcingSearchEnginePutTest, MaxTokenLenReturnsOkAndTruncatesTokens) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + // A length of 1 is allowed - even though it would be strange to want + // this. + options.set_max_token_length(1); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document = CreateMessageDocument("namespace", "uri"); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // "message" should have been truncated to "m" + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + // The indexed tokens were truncated to length of 1, so "m" will match + search_spec.set_query("m"); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // The query token is also truncated to length of 1, so "me"->"m" matches "m" + search_spec.set_query("me"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // The query token is still truncated to length of 1, so "massage"->"m" + // matches "m" + search_spec.set_query("massage"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEnginePutTest, + MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + // Set token length to max. This is allowed (it just means never to + // truncate tokens). However, this does mean that tokens that exceed the + // size of the lexicon will cause indexing to fail. + options.set_max_token_length(std::numeric_limits::max()); + IcingSearchEngine icing(options, GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Add a document that just barely fits under the max document limit. + // This will still fail to index because we won't actually have enough + // room in the lexicon to fit this content. + std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p'); + DocumentProto document = + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Message") + .AddStringProperty("body", std::move(enormous_string)) + .Build(); + EXPECT_THAT(icing.Put(document).status(), + ProtoStatusIs(StatusProto::OUT_OF_SPACE)); + + SearchSpecProto search_spec; + search_spec.set_query("p"); + search_spec.set_term_match_type(TermMatchType::PREFIX); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEnginePutTest, PutWithoutSchemaFailedPrecondition) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + DocumentProto document = CreateMessageDocument("namespace", "uri"); + PutResultProto put_result_proto = icing.Put(document); + EXPECT_THAT(put_result_proto.status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set")); +} + +TEST_F(IcingSearchEnginePutTest, IndexingDocMergeFailureResets) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", kIpsumText) + .Build(); + // 1. Create an index with a LiteIndex that will only allow one document + // before needing a merge. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + IcingSearchEngine icing(options, GetTestJniCache()); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Add two documents. These should get merged into the main index. + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = DocumentBuilder(document).SetUri("fake_type/1").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + // Add one document. This one should get remain in the lite index. + document = DocumentBuilder(document).SetUri("fake_type/2").Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } + + // 2. Delete the index file to trigger RestoreIndexIfNeeded. + std::string idx_subdir = GetIndexDir() + "/idx"; + filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); + + // 3. Setup a mock filesystem to fail to grow the main index once. + bool has_failed_already = false; + auto open_write_lambda = [this, &has_failed_already](const char* filename) { + std::string main_lexicon_suffix = "/main-lexicon.prop.2"; + std::string filename_string(filename); + if (!has_failed_already && + filename_string.length() >= main_lexicon_suffix.length() && + filename_string.substr( + filename_string.length() - main_lexicon_suffix.length(), + main_lexicon_suffix.length()) == main_lexicon_suffix) { + has_failed_already = true; + return -1; + } + return this->filesystem()->OpenForWrite(filename); + }; + auto mock_icing_filesystem = std::make_unique(); + ON_CALL(*mock_icing_filesystem, OpenForWrite) + .WillByDefault(open_write_lambda); + + // 4. Create the index again. This should trigger index restoration. + { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_index_merge_size(document.ByteSizeLong()); + TestIcingSearchEngine icing(options, std::make_unique(), + std::move(mock_icing_filesystem), + std::make_unique(), + GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), + ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); + + SearchSpecProto search_spec; + search_spec.set_query("consectetur"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + SearchResultProto results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.next_page_token(), Eq(0)); + // Only the last document that was added should still be retrievable. + ASSERT_THAT(results.results(), SizeIs(1)); + EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2")); + } +} + +TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogFunctionLatency) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + PutResultProto put_result_proto = icing.Put(document); + EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10)); +} + +TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogDocumentStoreStats) { + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .AddStringProperty("body", "message body") + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + PutResultProto put_result_proto = icing.Put(document); + EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(), + Eq(10)); + size_t document_size = put_result_proto.put_document_stats().document_size(); + EXPECT_THAT(document_size, Ge(document.ByteSizeLong())); + EXPECT_THAT(document_size, Le(document.ByteSizeLong() + + sizeof(DocumentProto::InternalFields))); +} + +TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexingStats) { + DocumentProto document = DocumentBuilder() + .SetKey("icing", "fake_type/0") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .Build(); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + PutResultProto put_result_proto = icing.Put(document); + EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10)); + // No merge should happen. + EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(), + Eq(0)); + // The input document has 2 tokens. + EXPECT_THAT(put_result_proto.put_document_stats() + .tokenization_stats() + .num_tokens_indexed(), + Eq(2)); +} + +TEST_F(IcingSearchEnginePutTest, PutDocumentShouldLogIndexMergeLatency) { + DocumentProto document1 = DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema("Message") + .AddStringProperty("body", kIpsumText) + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("icing", "fake_type/2") + .SetSchema("Message") + .AddStringProperty("body", kIpsumText) + .Build(); + + // Create an icing instance with index_merge_size = document1's size. + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + icing_options.set_index_merge_size(document1.ByteSizeLong()); + + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(10); + TestIcingSearchEngine icing(icing_options, std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // Putting document2 should trigger an index merge. + PutResultProto put_result_proto = icing.Put(document2); + EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); + EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(), + Eq(10)); +} + +TEST_F(IcingSearchEnginePutTest, PutDocumentIndexFailureDeletion) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Testing has shown that adding ~600,000 terms generated this way will + // fill up the hit buffer. + std::vector terms = GenerateUniqueTerms(600000); + std::string content = absl_ports::StrJoin(terms, " "); + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "foo " + content) + .Build(); + // We failed to add the document to the index fully. This means that we should + // reject the document from Icing entirely. + ASSERT_THAT(icing.Put(document).status(), + ProtoStatusIs(StatusProto::OUT_OF_SPACE)); + + // Make sure that the document isn't searchable. + SearchSpecProto search_spec; + search_spec.set_query("foo"); + search_spec.set_term_match_type(TERM_MATCH_PREFIX); + + SearchResultProto search_results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + ASSERT_THAT(search_results.status(), ProtoIsOk()); + ASSERT_THAT(search_results.results(), IsEmpty()); + + // Make sure that the document isn't retrievable. + GetResultProto get_result = + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()); + ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc new file mode 100644 index 0000000..b369d40 --- /dev/null +++ b/icing/icing-search-engine_schema_test.cc @@ -0,0 +1,1698 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/file/mock-filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/schema-builder.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Return; + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to +// IcingSearchEngine::GetSchema and IcingSearchEngine::SetSchema. +class IcingSearchEngineSchemaTest : public testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +// Non-zero value so we don't override it to be the current time +constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; + +std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; } + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { + return DocumentBuilder() + .SetKey(std::move(name_space), std::move(uri)) + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); +} + +SchemaProto CreateMessageSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +ScoringSpecProto GetDefaultScoringSpec() { + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + return scoring_spec; +} + +TEST_F(IcingSearchEngineSchemaTest, + CircularReferenceCreateSectionManagerReturnsInvalidArgument) { + // Create a type config with a circular reference. + SchemaProto schema; + auto* type = schema.add_types(); + type->set_schema_type("Message"); + + auto* body = type->add_properties(); + body->set_property_name("recipient"); + body->set_schema_type("Person"); + body->set_data_type(PropertyConfigProto::DataType::DOCUMENT); + body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + body->mutable_document_indexing_config()->set_index_nested_properties(true); + + type = schema.add_types(); + type->set_schema_type("Person"); + + body = type->add_properties(); + body->set_property_name("recipient"); + body->set_schema_type("Message"); + body->set_data_type(PropertyConfigProto::DataType::DOCUMENT); + body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + body->mutable_document_indexing_config()->set_index_nested_properties(true); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema).status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineSchemaTest, FailToReadSchema) { + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + + { + // Successfully initialize and set a schema + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + } + + auto mock_filesystem = std::make_unique(); + + // This fails FileBackedProto::Read() when we try to check the schema we + // had previously set + ON_CALL(*mock_filesystem, + OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb"))) + .WillByDefault(Return(-1)); + + TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), + GetTestJniCache()); + + InitializeResultProto initialize_result_proto = test_icing.Initialize(); + EXPECT_THAT(initialize_result_proto.status(), + ProtoStatusIs(StatusProto::INTERNAL)); + EXPECT_THAT(initialize_result_proto.status().message(), + HasSubstr("Unable to open file for read")); +} + +TEST_F(IcingSearchEngineSchemaTest, FailToWriteSchema) { + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + + auto mock_filesystem = std::make_unique(); + // This fails FileBackedProto::Write() + ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb"))) + .WillByDefault(Return(-1)); + + TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); + + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SetSchemaResultProto set_schema_result_proto = + icing.SetSchema(CreateMessageSchema()); + EXPECT_THAT(set_schema_result_proto.status(), + ProtoStatusIs(StatusProto::INTERNAL)); + EXPECT_THAT(set_schema_result_proto.status().message(), + HasSubstr("Unable to open file for write")); +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleFails) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with properties { "title", "body"} + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // 2. Add an email document + DocumentProto doc = DocumentBuilder() + .SetKey("emails", "email#1") + .SetSchema("Email") + .AddStringProperty("title", "Hello world.") + .AddStringProperty("body", "Goodnight Moon.") + .Build(); + EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk()); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 3. Set a schema that deletes email. This should fail. + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Message"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT( + icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false) + .status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + + // 4. Try to delete by email type. This should succeed because email wasn't + // deleted in step 3. + EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk()); + } +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaIncompatibleForceOverrideSucceeds) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with properties { "title", "body"} + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // 2. Add an email document + DocumentProto doc = DocumentBuilder() + .SetKey("emails", "email#1") + .SetSchema("Email") + .AddStringProperty("title", "Hello world.") + .AddStringProperty("body", "Goodnight Moon.") + .Build(); + EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk()); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 3. Set a schema that deletes email with force override. This should + // succeed and delete the email type. + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Message"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk()); + + // 4. Try to delete by email type. This should fail because email was + // already deleted. + EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + } +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaUnsetVersionIsZero) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with version 1 + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0)); +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaCompatibleVersionUpdateSucceeds) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with version 1 + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(1); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + SetSchemaResultProto set_schema_result = icing.SetSchema(schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 2. Create schema that adds a new optional property and updates version. + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(2); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + // 3. SetSchema should succeed and the version number should be updated. + SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_fully_compatible_changed_schema_types() + ->Add("Email"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); + } +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaIncompatibleVersionUpdateFails) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with version 1 + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(1); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED) + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(2); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + + // 3. SetSchema should fail and the version number should NOT be updated. + EXPECT_THAT(icing.SetSchema(schema).status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); + } +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with version 1 + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(1); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED) + // with force override to true. + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(2); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + + // 3. SetSchema should succeed and the version number should be updated. + EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); + } +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaNoChangeVersionUpdateSucceeds) { + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 1. Create a schema with an Email type with version 1 + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(1); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); + } + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 2. Create schema that only changes the version. + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_version(2); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + // 3. SetSchema should succeed and the version number should be updated. + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); + } +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaDuplicateTypesReturnsAlreadyExists) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create a schema with types { "Email", "Message" and "Email" } + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + type = schema.add_types(); + type->set_schema_type("Message"); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + *schema.add_types() = schema.types(0); + + EXPECT_THAT(icing.SetSchema(schema).status(), + ProtoStatusIs(StatusProto::ALREADY_EXISTS)); +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaDuplicatePropertiesReturnsAlreadyExists) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create a schema with an Email type with properties { "title", "body" and + // "title" } + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema).status(), + ProtoStatusIs(StatusProto::ALREADY_EXISTS)); +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchema) { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(1000); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + auto message_document = CreateMessageDocument("namespace", "uri"); + + auto schema_with_message = CreateMessageSchema(); + + SchemaProto schema_with_email; + SchemaTypeConfigProto* type = schema_with_email.add_types(); + type->set_schema_type("Email"); + PropertyConfigProto* property = type->add_properties(); + property->set_property_name("title"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + SchemaProto schema_with_email_and_message = schema_with_email; + type = schema_with_email_and_message.add_types(); + type->set_schema_type("Message"); + property = type->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + // Create an arbitrary invalid schema + SchemaProto invalid_schema; + SchemaTypeConfigProto* empty_type = invalid_schema.add_types(); + empty_type->set_schema_type(""); + + // Make sure we can't set invalid schemas + SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema); + EXPECT_THAT(set_schema_result.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); + + // Can add an document of a set schema + set_schema_result = icing.SetSchema(schema_with_message); + EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); + EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); + + // Schema with Email doesn't have Message, so would result incompatible + // data + set_schema_result = icing.SetSchema(schema_with_email); + EXPECT_THAT(set_schema_result.status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); + + // Can expand the set of schema types and add an document of a new + // schema type + set_schema_result = icing.SetSchema(schema_with_email_and_message); + EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK)); + EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); + + EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); + // Can't add an document whose schema isn't set + auto photo_document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Photo") + .AddStringProperty("creator", "icing") + .Build(); + PutResultProto put_result_proto = icing.Put(photo_document); + EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); + EXPECT_THAT(put_result_proto.status().message(), + HasSubstr("'Photo' not found")); +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaProto schema_with_no_indexed_property = CreateMessageSchema(); + schema_with_no_indexed_property.mutable_types(0) + ->mutable_properties(0) + ->clear_string_indexing_config(); + + SetSchemaResultProto set_schema_result = + icing.SetSchema(schema_with_no_indexed_property); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_new_schema_types()->Add("Message"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Nothing will be index and Search() won't return anything. + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto empty_result; + empty_result.mutable_status()->set_code(StatusProto::OK); + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); + + SchemaProto schema_with_indexed_property = CreateMessageSchema(); + // Index restoration should be triggered here because new schema requires more + // properties to be indexed. + set_schema_result = icing.SetSchema(schema_with_indexed_property); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Message"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + CreateMessageDocument("namespace", "uri"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaTypeConfigProto person_proto = + SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto nested_schema = + SchemaBuilder() + .AddType(person_proto) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + expected_set_schema_result.mutable_new_schema_types()->Add("Person"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + DocumentProto document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Bill Lundbergh") + .Build()) + .Build(); + + // "sender.name" should get assigned property id 0 and subject should get + // property id 1. + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // document should match a query for 'Bill' in 'sender.name', but not in + // 'subject' + SearchSpecProto search_spec; + search_spec.set_query("sender.name:Bill"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto result; + result.mutable_status()->set_code(StatusProto::OK); + *result.mutable_results()->Add()->mutable_document() = document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); + + SearchResultProto empty_result; + empty_result.mutable_status()->set_code(StatusProto::OK); + search_spec.set_query("subject:Bill"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); + + // Now update the schema with index_nested_properties=false. This should + // reassign property ids, lead to an index rebuild and ensure that nothing + // match a query for "Bill". + SchemaProto no_nested_schema = + SchemaBuilder() + .AddType(person_proto) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + set_schema_result = icing.SetSchema(no_nested_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // document shouldn't match a query for 'Bill' in either 'sender.name' or + // 'subject' + search_spec.set_query("sender.name:Bill"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); + + search_spec.set_query("subject:Bill"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); +} + +TEST_F(IcingSearchEngineSchemaTest, + ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 'body' should have a property id of 0 and 'subject' should have a property + // id of 1. + SchemaProto email_with_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SetSchemaResultProto set_schema_result = + icing.SetSchema(email_with_body_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Create a document with only a subject property. + DocumentProto document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + SearchSpecProto search_spec; + search_spec.set_query("subject:tps"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto result; + result.mutable_status()->set_code(StatusProto::OK); + *result.mutable_results()->Add()->mutable_document() = document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); + + // Now update the schema to remove the 'body' field. This is backwards + // incompatible, but document should be preserved because it doesn't contain a + // 'body' field. If the index is correctly rebuilt, then 'subject' will now + // have a property id of 0. If not, then the hits in the index will still have + // have a property id of 1 and therefore it won't be found. + SchemaProto email_no_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + set_schema_result = icing.SetSchema( + email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + search_spec.set_query("subject:tps"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); +} + +TEST_F( + IcingSearchEngineSchemaTest, + ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 'body' should have a property id of 0 and 'subject' should have a property + // id of 1. + SchemaProto email_with_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SetSchemaResultProto set_schema_result = + icing.SetSchema(email_with_body_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Create a document with only a subject property. + DocumentProto document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + SearchSpecProto search_spec; + search_spec.set_query("subject:tps"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto result; + result.mutable_status()->set_code(StatusProto::OK); + *result.mutable_results()->Add()->mutable_document() = document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); + + // Now update the schema to remove the 'body' field. This is backwards + // incompatible, but document should be preserved because it doesn't contain a + // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to' + // will now have property ids of 0 and 1 respectively. If not, then the hits + // in the index will still have have a property id of 1 and therefore it won't + // be found. + SchemaProto email_no_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("to") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + set_schema_result = icing.SetSchema( + email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + search_spec.set_query("subject:tps"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); +} + +TEST_F(IcingSearchEngineSchemaTest, + ForceSetSchemaIncompatibleNestedDocsAreDeleted) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaTypeConfigProto email_schema_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument("Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto nested_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("company") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(email_schema_type) + .Build(); + + SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_new_schema_types()->Add("Email"); + expected_set_schema_result.mutable_new_schema_types()->Add("Person"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Create two documents - a person document and an email document - both docs + // should be deleted when we remove the 'company' field from the person type. + DocumentProto person_document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Person") + .SetCreationTimestampMs(1000) + .AddStringProperty("name", "Bill Lundbergh") + .AddStringProperty("company", "Initech Corp.") + .Build(); + EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk()); + + DocumentProto email_document = + DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .AddDocumentProperty("sender", person_document) + .Build(); + EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); + + // We should be able to retrieve both documents. + GetResultProto get_result = + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoIsOk()); + EXPECT_THAT(get_result.document(), EqualsProto(person_document)); + + get_result = + icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoIsOk()); + EXPECT_THAT(get_result.document(), EqualsProto(email_document)); + + // Now update the schema to remove the 'company' field. This is backwards + // incompatible, *both* documents should be deleted because both fail + // validation (they each contain a 'Person' that has a non-existent property). + nested_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(email_schema_type) + .Build(); + + set_schema_result = icing.SetSchema( + nested_schema, /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person"); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Email"); + expected_set_schema_result.mutable_index_incompatible_changed_schema_types() + ->Add("Person"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Both documents should be deleted now. + get_result = + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); + + get_result = + icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); +} + +// TODO(b/256022027): add unit tests for join incompatible schema change to make +// sure the joinable cache is rebuilt correctly. + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaRevalidatesDocumentsAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaProto schema_with_optional_subject; + auto type = schema_with_optional_subject.add_types(); + type->set_schema_type("email"); + + // Add a OPTIONAL property + auto property = type->add_properties(); + property->set_property_name("subject"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(), + ProtoIsOk()); + + DocumentProto email_document_without_subject = + DocumentBuilder() + .SetKey("namespace", "without_subject") + .SetSchema("email") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto email_document_with_subject = + DocumentBuilder() + .SetKey("namespace", "with_subject") + .SetSchema("email") + .AddStringProperty("subject", "foo") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk()); + + SchemaProto schema_with_required_subject; + type = schema_with_required_subject.add_types(); + type->set_schema_type("email"); + + // Add a REQUIRED property + property = type->add_properties(); + property->set_property_name("subject"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + + // Can't set the schema since it's incompatible + SetSchemaResultProto set_schema_result = + icing.SetSchema(schema_with_required_subject); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_set_schema_result_proto; + expected_set_schema_result_proto.mutable_status()->set_code( + StatusProto::FAILED_PRECONDITION); + expected_set_schema_result_proto.mutable_status()->set_message( + "Schema is incompatible."); + expected_set_schema_result_proto.add_incompatible_schema_types("email"); + + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto)); + + // Force set it + set_schema_result = + icing.SetSchema(schema_with_required_subject, + /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + expected_set_schema_result_proto.mutable_status()->clear_message(); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto)); + + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = email_document_with_subject; + + EXPECT_THAT(icing.Get("namespace", "with_subject", + GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // The document without a subject got deleted because it failed validation + // against the new schema + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, without_subject) not found."); + expected_get_result_proto.clear_document(); + + EXPECT_THAT(icing.Get("namespace", "without_subject", + GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaDeletesDocumentsAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaProto schema; + auto type = schema.add_types(); + type->set_schema_type("email"); + type = schema.add_types(); + type->set_schema_type("message"); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto email_document = + DocumentBuilder() + .SetKey("namespace", "email_uri") + .SetSchema("email") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto message_document = + DocumentBuilder() + .SetKey("namespace", "message_uri") + .SetSchema("message") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); + + // Clear the schema and only add the "email" type, essentially deleting the + // "message" type + SchemaProto new_schema; + type = new_schema.add_types(); + type->set_schema_type("email"); + + // Can't set the schema since it's incompatible + SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + SetSchemaResultProto expected_result; + expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION); + expected_result.mutable_status()->set_message("Schema is incompatible."); + expected_result.add_deleted_schema_types("message"); + + EXPECT_THAT(set_schema_result, EqualsProto(expected_result)); + + // Force set it + set_schema_result = + icing.SetSchema(new_schema, + /*ignore_errors_and_delete_documents=*/true); + // Ignore latency numbers. They're covered elsewhere. + set_schema_result.clear_latency_ms(); + expected_result.mutable_status()->set_code(StatusProto::OK); + expected_result.mutable_status()->clear_message(); + EXPECT_THAT(set_schema_result, EqualsProto(expected_result)); + + // "email" document is still there + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = email_document; + + EXPECT_THAT(icing.Get("namespace", "email_uri", + GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // "message" document got deleted + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, message_uri) not found."); + expected_get_result_proto.clear_document(); + + EXPECT_THAT(icing.Get("namespace", "message_uri", + GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineSchemaTest, GetSchemaNotFound) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + EXPECT_THAT(icing.GetSchema().status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); +} + +TEST_F(IcingSearchEngineSchemaTest, GetSchemaOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); + EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto)); +} + +TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeFailedPrecondition) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + GetSchemaTypeResultProto get_schema_type_result_proto = + icing.GetSchemaType("nonexistent_schema"); + EXPECT_THAT(get_schema_type_result_proto.status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); + EXPECT_THAT(get_schema_type_result_proto.status().message(), + HasSubstr("Schema not set")); +} + +TEST_F(IcingSearchEngineSchemaTest, GetSchemaTypeOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + GetSchemaTypeResultProto expected_get_schema_type_result_proto; + expected_get_schema_type_result_proto.mutable_status()->set_code( + StatusProto::OK); + *expected_get_schema_type_result_proto.mutable_schema_type_config() = + CreateMessageSchema().types(0); + EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()), + EqualsProto(expected_get_schema_type_result_proto)); +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) { + SchemaProto schema; + auto type = schema.add_types(); + type->set_schema_type("Message"); + + auto body = type->add_properties(); + body->set_property_name("body"); + body->set_data_type(PropertyConfigProto::DataType::STRING); + body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + // Make an incompatible schema, a previously OPTIONAL field is REQUIRED + SchemaProto incompatible_schema = schema; + incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality( + PropertyConfigProto::Cardinality::REQUIRED); + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + } // This should shut down IcingSearchEngine and persist anything it needs to + + ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str())); + + // Since we don't have any documents yet, we can't detect this edge-case. But + // it should be fine since there aren't any documents to be invalidated. + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineSchemaTest, SetSchemaCanDetectPreviousSchemaWasLost) { + SchemaProto schema; + auto type = schema.add_types(); + type->set_schema_type("Message"); + + auto body = type->add_properties(); + body->set_property_name("body"); + body->set_data_type(PropertyConfigProto::DataType::STRING); + body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + body->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + body->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + // Make an incompatible schema, a previously OPTIONAL field is REQUIRED + SchemaProto incompatible_schema = schema; + incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality( + PropertyConfigProto::Cardinality::REQUIRED); + + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // Can retrieve by namespace/uri + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; + + ASSERT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Can search for it + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + CreateMessageDocument("namespace", "uri"); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + } // This should shut down IcingSearchEngine and persist anything it needs to + + ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str())); + + // Setting the new, different schema will remove incompatible documents + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk()); + + // Can't retrieve by namespace/uri + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (namespace, uri) not found."); + + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + + // Can't search for it + SearchResultProto empty_result; + empty_result.mutable_status()->set_code(StatusProto::OK); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); +} + +TEST_F(IcingSearchEngineSchemaTest, IcingShouldWorkFor64Sections) { + // Create a schema with 64 sections + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + // Person has 4 sections. + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("phoneNumber") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + // Email has 16 sections. + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("date") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("time") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("receiver") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("cc") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_REPEATED))) + .AddType(SchemaTypeConfigBuilder() + // EmailCollection has 64 sections. + .SetType("EmailCollection") + .AddProperty( + PropertyConfigBuilder() + .SetName("email1") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("email2") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("email3") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("email4") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person1 = + DocumentBuilder() + .SetKey("namespace", "person1") + .SetSchema("Person") + .AddStringProperty("firstName", "first1") + .AddStringProperty("lastName", "last1") + .AddStringProperty("emailAddress", "email1@gmail.com") + .AddStringProperty("phoneNumber", "000-000-001") + .Build(); + DocumentProto person2 = + DocumentBuilder() + .SetKey("namespace", "person2") + .SetSchema("Person") + .AddStringProperty("firstName", "first2") + .AddStringProperty("lastName", "last2") + .AddStringProperty("emailAddress", "email2@gmail.com") + .AddStringProperty("phoneNumber", "000-000-002") + .Build(); + DocumentProto person3 = + DocumentBuilder() + .SetKey("namespace", "person3") + .SetSchema("Person") + .AddStringProperty("firstName", "first3") + .AddStringProperty("lastName", "last3") + .AddStringProperty("emailAddress", "email3@gmail.com") + .AddStringProperty("phoneNumber", "000-000-003") + .Build(); + DocumentProto email1 = DocumentBuilder() + .SetKey("namespace", "email1") + .SetSchema("Email") + .AddStringProperty("body", "test body") + .AddStringProperty("subject", "test subject") + .AddStringProperty("date", "2022-08-01") + .AddStringProperty("time", "1:00 PM") + .AddDocumentProperty("sender", person1) + .AddDocumentProperty("receiver", person2) + .AddDocumentProperty("cc", person3) + .Build(); + DocumentProto email2 = DocumentBuilder() + .SetKey("namespace", "email2") + .SetSchema("Email") + .AddStringProperty("body", "test body") + .AddStringProperty("subject", "test subject") + .AddStringProperty("date", "2022-08-02") + .AddStringProperty("time", "2:00 PM") + .AddDocumentProperty("sender", person2) + .AddDocumentProperty("receiver", person1) + .AddDocumentProperty("cc", person3) + .Build(); + DocumentProto email3 = DocumentBuilder() + .SetKey("namespace", "email3") + .SetSchema("Email") + .AddStringProperty("body", "test body") + .AddStringProperty("subject", "test subject") + .AddStringProperty("date", "2022-08-03") + .AddStringProperty("time", "3:00 PM") + .AddDocumentProperty("sender", person3) + .AddDocumentProperty("receiver", person1) + .AddDocumentProperty("cc", person2) + .Build(); + DocumentProto email4 = DocumentBuilder() + .SetKey("namespace", "email4") + .SetSchema("Email") + .AddStringProperty("body", "test body") + .AddStringProperty("subject", "test subject") + .AddStringProperty("date", "2022-08-04") + .AddStringProperty("time", "4:00 PM") + .AddDocumentProperty("sender", person3) + .AddDocumentProperty("receiver", person2) + .AddDocumentProperty("cc", person1) + .Build(); + DocumentProto email_collection = + DocumentBuilder() + .SetKey("namespace", "email_collection") + .SetSchema("EmailCollection") + .AddDocumentProperty("email1", email1) + .AddDocumentProperty("email2", email2) + .AddDocumentProperty("email3", email3) + .AddDocumentProperty("email4", email4) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk()); + + const std::vector query_terms = { + "first1", "last2", "email3@gmail.com", "000-000-001", + "body", "subject", "2022-08-02", "3\\:00"}; + SearchResultProto expected_document; + expected_document.mutable_status()->set_code(StatusProto::OK); + *expected_document.mutable_results()->Add()->mutable_document() = + email_collection; + for (const std::string& query_term : query_terms) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query(query_term); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(expected_document)); + } + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("foo"); + SearchResultProto expected_no_documents; + expected_no_documents.mutable_status()->set_code(StatusProto::OK); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(expected_no_documents)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc new file mode 100644 index 0000000..9ebd060 --- /dev/null +++ b/icing/icing-search-engine_search_test.cc @@ -0,0 +1,4143 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/file/mock-filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/join/join-processor.h" +#include "icing/legacy/index/icing-mock-filesystem.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/query/query-features.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/store/document-log-creator.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/random-string.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/util/snippet-helpers.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::IsEmpty; +using ::testing::Le; +using ::testing::Lt; +using ::testing::Matcher; +using ::testing::Ne; +using ::testing::Return; +using ::testing::SizeIs; +using ::testing::StrEq; +using ::testing::UnorderedElementsAre; + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to IcingSearchEngine::Search +// and IcingSearchEngine::GetNextPage. +class IcingSearchEngineSearchTest + : public ::testing::TestWithParam { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +// Non-zero value so we don't override it to be the current time +constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { + return DocumentBuilder() + .SetKey(std::move(name_space), std::move(uri)) + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); +} + +DocumentProto CreateEmailDocument(const std::string& name_space, + const std::string& uri, int score, + const std::string& subject_content, + const std::string& body_content) { + return DocumentBuilder() + .SetKey(name_space, uri) + .SetSchema("Email") + .SetScore(score) + .AddStringProperty("subject", subject_content) + .AddStringProperty("body", body_content) + .Build(); +} + +SchemaProto CreateMessageSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +SchemaProto CreateEmailSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); +} + +SchemaProto CreatePersonAndEmailSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); +} + +ScoringSpecProto GetDefaultScoringSpec() { + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + return scoring_spec; +} + +UsageReport CreateUsageReport(std::string name_space, std::string uri, + int64 timestamp_ms, + UsageReport::UsageType usage_type) { + UsageReport usage_report; + usage_report.set_document_namespace(name_space); + usage_report.set_document_uri(uri); + usage_report.set_usage_timestamp_ms(timestamp_ms); + usage_report.set_usage_type(usage_type); + return usage_report; +} + +std::vector GetUrisFromSearchResults( + SearchResultProto& search_result_proto) { + std::vector result_uris; + result_uris.reserve(search_result_proto.results_size()); + for (int i = 0; i < search_result_proto.results_size(); i++) { + result_uris.push_back( + search_result_proto.mutable_results(i)->document().uri()); + } + return result_uris; +} + +TEST_P(IcingSearchEngineSearchTest, SearchReturnsValidResults) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); + result_spec.mutable_snippet_spec()->set_num_to_snippet(1); + + SearchResultProto results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(2)); + + const DocumentProto& document = results.results(0).document(); + EXPECT_THAT(document, EqualsProto(document_two)); + + const SnippetProto& snippet = results.results(0).snippet(); + EXPECT_THAT(snippet.entries(), SizeIs(1)); + EXPECT_THAT(snippet.entries(0).property_name(), Eq("body")); + std::string_view content = + GetString(&document, snippet.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, snippet.entries(0)), + ElementsAre("message body")); + EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message")); + + EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); + EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty()); + + search_spec.set_query("foo"); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresDocumentScore) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); + document_one.set_score(93); + document_one.set_creation_timestamp_ms(10000); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); + document_two.set_score(15); + document_two.set_creation_timestamp_ms(12000); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + // Rank by DOCUMENT_SCORE and ensure that the score field is populated with + // document score. + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + SearchResultProto results = icing.Search(search_spec, scoring_spec, + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(2)); + + EXPECT_THAT(results.results(0).document(), EqualsProto(document_one)); + EXPECT_THAT(results.results(0).score(), 93); + EXPECT_THAT(results.results(1).document(), EqualsProto(document_two)); + EXPECT_THAT(results.results(1).score(), 15); +} + +TEST_P(IcingSearchEngineSearchTest, SearchReturnsScoresCreationTimestamp) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); + document_one.set_score(93); + document_one.set_creation_timestamp_ms(10000); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); + document_two.set_score(15); + document_two.set_creation_timestamp_ms(12000); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + // Rank by CREATION_TS and ensure that the score field is populated with + // creation ts. + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + + SearchResultProto results = icing.Search(search_spec, scoring_spec, + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(2)); + + EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); + EXPECT_THAT(results.results(0).score(), 12000); + EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); + EXPECT_THAT(results.results(1).score(), 10000); +} + +TEST_P(IcingSearchEngineSearchTest, SearchReturnsOneResult) { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(1000); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document_two; + + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + + EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(), + Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(), + Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(), + Eq(1000)); + + // The token is a random number so we don't verify it. + expected_search_result_proto.set_next_page_token( + search_result_proto.next_page_token()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchZeroResultLimitReturnsEmptyResults) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query(""); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(0); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchNegativeResultLimitReturnsInvalidArgument) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query(""); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(-5); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code( + StatusProto::INVALID_ARGUMENT); + expected_search_result_proto.mutable_status()->set_message( + "ResultSpecProto.num_per_page cannot be negative."); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query(""); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_total_bytes_per_page_threshold(-1); + + SearchResultProto actual_results1 = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results1.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + + result_spec.set_num_total_bytes_per_page_threshold(0); + SearchResultProto actual_results2 = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results2.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithPersistenceReturnsValidResults) { + IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + + { + // Set the schema up beforehand. + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + // Schema will be persisted to disk when icing goes out of scope. + } + + { + // Ensure that icing initializes the schema and section_manager + // properly from the pre-existing file. + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); + // The index and document store will be persisted to disk when icing goes + // out of scope. + } + + { + // Ensure that the index is brought back up without problems and we + // can query for the content that we expect. + IcingSearchEngine icing(icing_options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + CreateMessageDocument("namespace", "uri"); + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + search_spec.set_query("foo"); + + SearchResultProto empty_result; + empty_result.mutable_status()->set_code(StatusProto::OK); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); + } +} + +TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnEmpty) { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(1000); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + // Empty result, no next-page token + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + + EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(), + Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000)); + EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0)); + EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(), + Eq(0)); + EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(), + Eq(1000)); + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchShouldReturnMultiplePages) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates and inserts 5 documents + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); + DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(2); + + // Searches and gets the first page, 2 results + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document5; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document4; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); + uint64_t next_page_token = search_result_proto.next_page_token(); + // Since the token is a random number, we don't need to verify + expected_search_result_proto.set_next_page_token(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Second page, 2 results + expected_search_result_proto.clear_results(); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Third page, 1 result + expected_search_result_proto.clear_results(); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + // Because there are no more results, we should not return the next page + // token. + expected_search_result_proto.clear_next_page_token(); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // No more results + expected_search_result_proto.clear_results(); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchWithNoScoringShouldReturnMultiplePages) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates and inserts 5 documents + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); + DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(2); + + // Searches and gets the first page, 2 results + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document5; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document4; + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); + uint64_t next_page_token = search_result_proto.next_page_token(); + // Since the token is a random number, we don't need to verify + expected_search_result_proto.set_next_page_token(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Second page, 2 results + expected_search_result_proto.clear_results(); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Third page, 1 result + expected_search_result_proto.clear_results(); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + // Because there are no more results, we should not return the next page + // token. + expected_search_result_proto.clear_next_page_token(); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // No more results + expected_search_result_proto.clear_results(); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchWithUnknownEnabledFeatureShouldReturnError) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + search_spec.add_enabled_features("BAD_FEATURE"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, ShouldReturnMultiplePagesWithSnippets) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates and inserts 5 documents + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); + DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(2); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); + result_spec.mutable_snippet_spec()->set_num_to_snippet(3); + + // Searches and gets the first page, 2 results with 2 snippets + SearchResultProto search_result = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(2)); + ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); + + const DocumentProto& document_result_1 = search_result.results(0).document(); + EXPECT_THAT(document_result_1, EqualsProto(document5)); + const SnippetProto& snippet_result_1 = search_result.results(0).snippet(); + EXPECT_THAT(snippet_result_1.entries(), SizeIs(1)); + EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body")); + std::string_view content = GetString( + &document_result_1, snippet_result_1.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)), + ElementsAre("message body")); + EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)), + ElementsAre("message")); + + const DocumentProto& document_result_2 = search_result.results(1).document(); + EXPECT_THAT(document_result_2, EqualsProto(document4)); + const SnippetProto& snippet_result_2 = search_result.results(1).snippet(); + EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body")); + content = GetString(&document_result_2, + snippet_result_2.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)), + ElementsAre("message body")); + EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)), + ElementsAre("message")); + + // Second page, 2 result with 1 snippet + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(2)); + ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); + + const DocumentProto& document_result_3 = search_result.results(0).document(); + EXPECT_THAT(document_result_3, EqualsProto(document3)); + const SnippetProto& snippet_result_3 = search_result.results(0).snippet(); + EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body")); + content = GetString(&document_result_3, + snippet_result_3.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)), + ElementsAre("message body")); + EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)), + ElementsAre("message")); + + EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2)); + EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty()); + + // Third page, 1 result with 0 snippets + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(1)); + ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); + + EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1)); + EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty()); +} + +TEST_P(IcingSearchEngineSearchTest, ShouldInvalidateNextPageToken) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + // Searches and gets the first page, 1 result + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); + uint64_t next_page_token = search_result_proto.next_page_token(); + // Since the token is a random number, we don't need to verify + expected_search_result_proto.set_next_page_token(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + // Now document1 is still to be fetched. + + // Invalidates token + icing.InvalidateNextPageToken(next_page_token); + + // Tries to fetch the second page, no result since it's invalidated + expected_search_result_proto.clear_results(); + expected_search_result_proto.clear_next_page_token(); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchIncludesDocumentsBeforeTtl) { + SchemaProto schema; + auto type = schema.add_types(); + type->set_schema_type("Message"); + + auto body = type->add_properties(); + body->set_property_name("body"); + body->set_data_type(PropertyConfigProto::DataType::STRING); + body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + body->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + body->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(500) + .Build(); + + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_search_type(GetParam()); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; + + // Time just has to be less than the document's creation timestamp (100) + the + // document's ttl (500) + auto fake_clock = std::make_unique(); + fake_clock->SetSystemTimeMilliseconds(400); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // Check that the document is returned as part of search results + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchDoesntIncludeDocumentsPastTtl) { + SchemaProto schema; + auto type = schema.add_types(); + type->set_schema_type("Message"); + + auto body = type->add_properties(); + body->set_property_name("body"); + body->set_data_type(PropertyConfigProto::DataType::STRING); + body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + body->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + body->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(500) + .Build(); + + SearchSpecProto search_spec; + search_spec.set_query("message"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_search_type(GetParam()); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + // Time just has to be greater than the document's creation timestamp (100) + + // the document's ttl (500) + auto fake_clock = std::make_unique(); + fake_clock->SetSystemTimeMilliseconds(700); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // Check that the document is not returned as part of search results + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchWorksAfterSchemaTypesCompatiblyModified) { + SchemaProto schema; + auto type_config = schema.add_types(); + type_config->set_schema_type("message"); + + auto property = type_config->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + + DocumentProto message_document = + DocumentBuilder() + .SetKey("namespace", "message_uri") + .SetSchema("message") + .AddStringProperty("body", "foo") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk()); + + // Make sure we can search for message document + SearchSpecProto search_spec; + search_spec.set_query("foo"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_search_type(GetParam()); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + // The message isn't indexed, so we get nothing + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // With just the schema type filter, we can search for the message + search_spec.Clear(); + search_spec.add_schema_type_filters("message"); + + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + message_document; + + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Since SchemaTypeIds are assigned based on order in the SchemaProto, this + // will force a change in the DocumentStore's cached SchemaTypeIds + schema.clear_types(); + type_config = schema.add_types(); + type_config->set_schema_type("email"); + + // Adding a new indexed property will require reindexing + type_config = schema.add_types(); + type_config->set_schema_type("message"); + + property = type_config->add_properties(); + property->set_property_name("body"); + property->set_data_type(PropertyConfigProto::DataType::STRING); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + property->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::PREFIX); + property->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + search_spec.Clear(); + search_spec.set_query("foo"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.add_schema_type_filters("message"); + + // We can still search for the message document + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByDocumentScore) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(3) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + // Intentionally inserts the documents in the order that is different than + // their score order + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // Result should be in descending score order + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchShouldAllowNoScoring) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 documents and ensures the relationship of them is: + // document1 < document2 < document3 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(1571111111111) + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(1572222222222) + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(3) + .SetCreationTimestampMs(1573333333333) + .Build(); + + // Intentionally inserts the documents in the order that is different than + // their score order + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + + // Results should not be ranked by score but returned in reverse insertion + // order. + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultShouldBeRankedByCreationTimestamp) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 documents and ensures the relationship in terms of creation + // timestamp score is: document1 < document2 < document3 + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetCreationTimestampMs(1571111111111) + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetCreationTimestampMs(1572222222222) + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetCreationTimestampMs(1573333333333) + .Build(); + + // Intentionally inserts the documents in the order that is different than + // their score order + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // Result should be in descending timestamp order + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByUsageCount) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 test documents + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + // Intentionally inserts the documents in a different order to eliminate the + // possibility that the following results are sorted in the default reverse + // insertion order. + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 > + // doc1 when ranked by USAGE_TYPE1_COUNT. + UsageReport usage_report_doc3 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + UsageReport usage_report_doc2 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk()); + ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk()); + ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // Result should be in descending USAGE_TYPE1_COUNT order + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultShouldHaveDefaultOrderWithoutUsageCounts) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 test documents + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // None of the documents have usage reports. Result should be in the default + // reverse insertion order. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultShouldBeRankedByUsageTimestamp) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 test documents + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + // Intentionally inserts the documents in a different order to eliminate the + // possibility that the following results are sorted in the default reverse + // insertion order. + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when + // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP. + UsageReport usage_report_doc2 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000, + UsageReport::USAGE_TYPE1); + UsageReport usage_report_doc3 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000, + UsageReport::USAGE_TYPE1); + ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk()); + ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespace) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace1". + DocumentProto document = CreateEmailDocument( + "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, + "starbucks coffee", + "habit. birthday rewards. good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("coffee OR food"); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + + // Result should be in descending score order + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + // Both doc5 and doc7 have "coffee" in name and text sections. + // However, doc5 has more matches in the text section. + // Documents with "food" are ranked lower as the term "food" is commonly + // present in this corpus, and thus, has a lower IDF. + EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), + ElementsAre("namespace1/uri5", // 'coffee' 3 times + "namespace1/uri7", // 'coffee' 2 times + "namespace1/uri1", // 'food' 2 times + "namespace1/uri4", // 'food' 2 times + "namespace1/uri2", // 'food' 1 time + "namespace1/uri6")); // 'food' 1 time +} + +TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringOneNamespaceAdvanced) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace1". + DocumentProto document = CreateEmailDocument( + "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, + "starbucks coffee", + "habit. birthday rewards. good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("coffee OR food"); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1"); + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + + // Result should be in descending score order + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + // Both doc5 and doc7 have "coffee" in name and text sections. + // However, doc5 has more matches in the text section. + // Documents with "food" are ranked lower as the term "food" is commonly + // present in this corpus, and thus, has a lower IDF. + EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), + ElementsAre("namespace1/uri5", // 'coffee' 3 times + "namespace1/uri7", // 'coffee' 2 times + "namespace1/uri1", // 'food' 2 times + "namespace1/uri4", // 'food' 2 times + "namespace1/uri2", // 'food' 1 time + "namespace1/uri6")); // 'food' 1 time +} + +TEST_P(IcingSearchEngineSearchTest, + Bm25fRelevanceScoringOneNamespaceNotOperator) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace1". + DocumentProto document = CreateEmailDocument( + "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza", + "thin-crust pizza. good and fast. nice coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, + "starbucks coffee", + "habit. birthday rewards. good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("coffee -starbucks"); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + + // Result should be in descending score order + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + EXPECT_THAT( + GetUrisFromSearchResults(search_result_proto), + ElementsAre("namespace1/uri5", // 'coffee' 3 times, 'starbucks' 0 times + "namespace1/uri3")); // 'coffee' 1 times, 'starbucks' 0 times +} + +TEST_P(IcingSearchEngineSearchTest, + Bm25fRelevanceScoringOneNamespaceSectionRestrict) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace1". + DocumentProto document = CreateEmailDocument( + "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = + CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18, + "peets coffee, best coffee", + "espresso. decaf. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri7", /*score=*/4, "starbucks", + "habit. birthday rewards. good coffee. brewed coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("subject:coffee OR body:food"); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + + // Result should be in descending score order + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + // The term frequencies of "coffee" and "food" are calculated respectively + // from the subject section and the body section. + // Documents with "food" are ranked lower as the term "food" is commonly + // present in this corpus, and thus, has a lower IDF. + EXPECT_THAT( + GetUrisFromSearchResults(search_result_proto), + ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject + "namespace1/uri1", // 'food' 2 times in section body + "namespace1/uri4", // 'food' 2 times in section body + "namespace1/uri2", // 'food' 1 time in section body + "namespace1/uri6")); // 'food' 1 time in section body +} + +TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringTwoNamespaces) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace1". + DocumentProto document = CreateEmailDocument( + "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, + "starbucks coffee", + "habit. birthday rewards. good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace2". + document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10, + "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace2", "namespace2/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4, + "starbucks coffee", "good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("coffee OR food"); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); + ResultSpecProto result_spec_proto; + result_spec_proto.set_num_per_page(16); + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec_proto); + + // Result should be in descending score order + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + // The two corpora have the same documents except for document 7, which in + // "namespace2" is much shorter than the average dcoument length, so it is + // boosted. + EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), + ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc + "namespace1/uri5", // 'coffee' 3 times + "namespace2/uri5", // 'coffee' 3 times + "namespace1/uri7", // 'coffee' 2 times + "namespace1/uri1", // 'food' 2 times + "namespace2/uri1", // 'food' 2 times + "namespace1/uri4", // 'food' 2 times + "namespace2/uri4", // 'food' 2 times + "namespace1/uri2", // 'food' 1 time + "namespace2/uri2", // 'food' 1 time + "namespace1/uri6", // 'food' 1 time + "namespace2/uri6")); // 'food' 1 time +} + +TEST_P(IcingSearchEngineSearchTest, Bm25fRelevanceScoringWithNamespaceFilter) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace1". + DocumentProto document = CreateEmailDocument( + "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace1", "namespace1/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, + "starbucks coffee", + "habit. birthday rewards. good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // Create and index documents in namespace "namespace2". + document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10, + "sushi belmont", + "fresh fish. inexpensive. good sushi."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander", + "indian food. buffet. spicy food. kadai chicken."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4, + "panda express", + "chinese food. cheap. inexpensive. kung pao."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23, + "speederia pizza", + "thin-crust pizza. good and fast."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8, + "whole foods", + "salads. pizza. organic food. expensive."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee", + "espresso. decaf. brewed coffee. whole beans. excellent coffee."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument( + "namespace2", "namespace2/uri6", /*score=*/4, "costco", + "bulk. cheap whole beans. frozen fish. food samples."); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4, + "starbucks coffee", "good coffee"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("coffee OR food"); + search_spec.set_search_type(GetParam()); + // Now query only corpus 2 + search_spec.add_namespace_filters("namespace2"); + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + search_result_proto = icing.Search(search_spec, scoring_spec, + ResultSpecProto::default_instance()); + + // Result from namespace "namespace2" should be in descending score order + EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); + // Both doc5 and doc7 have "coffee" in name and text sections. + // Even though doc5 has more matches in the text section, doc7's length is + // much shorter than the average corpus's length, so it's being boosted. + // Documents with "food" are ranked lower as the term "food" is commonly + // present in this corpus, and thus, has a lower IDF. + EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), + ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc + "namespace2/uri5", // 'coffee' 3 times + "namespace2/uri1", // 'food' 2 times + "namespace2/uri4", // 'food' 2 times + "namespace2/uri2", // 'food' 1 time + "namespace2/uri6")); // 'food' 1 time +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 test documents + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // None of the documents have usage reports. Result should be in the default + // reverse insertion order. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedAscendingly) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(3) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + // Intentionally inserts the documents in the order that is different than + // their score order + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // "m" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + // Result should be in ascending score order + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + scoring_spec.set_order_by(ScoringSpecProto::Order::ASC); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingDuplicateNamespaceShouldReturnError) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + // Specify "namespace1" twice. This should result in an error. + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::NAMESPACE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace1"); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace2"); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace1"); + result_grouping = result_spec.add_result_groupings(); + entry = result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace1"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_result_proto.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingDuplicateSchemaShouldReturnError) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + // Specify "Message" twice. This should result in an error. + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_schema("Message"); + entry = result_grouping->add_entry_groupings(); + entry->set_schema("nonexistentMessage"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + entry = result_grouping->add_entry_groupings(); + entry->set_schema("Message"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_result_proto.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + // Specify "namespace1xMessage" twice. This should result in an error. + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace1"); + entry->set_schema("Message"); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace2"); + entry->set_schema("Message"); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace1"); + entry->set_schema("Message"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace1"); + entry->set_schema("Message"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_result_proto.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingNonPositiveMaxResultsShouldReturnError) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace2", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + // Specify zero results. This should result in an error. + ResultSpecProto result_spec; + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(0); + entry->set_namespace_("namespace1"); + entry->set_schema("Message"); + result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace2"); + entry->set_schema("Message"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_result_proto.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + + // Specify negative results. This should result in an error. + result_spec.mutable_result_groupings(0)->set_max_results(-1); + EXPECT_THAT(search_result_proto.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingMultiNamespaceGrouping) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 < document4 < document5 < + // document6 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace2", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(3) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document4 = + DocumentBuilder() + .SetKey("namespace2", "uri/4") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(4) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document5 = + DocumentBuilder() + .SetKey("namespace3", "uri/5") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(5) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document6 = + DocumentBuilder() + .SetKey("namespace3", "uri/6") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(6) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk()); + + // "m" will match all 6 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::NAMESPACE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace1"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(2); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace2"); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace3"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + + // The last result (document1) in namespace "namespace1" should not be + // included. "namespace2" and "namespace3" are grouped together. So only the + // two highest scored documents between the two (both of which are in + // "namespace3") should be returned. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document6; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document5; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchResultGroupingMultiSchemaGrouping) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetScore(1) + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .AddDocumentProperty("sender", DocumentBuilder() + .SetKey("namespace", "uri1-sender") + .SetSchema("Person") + .AddStringProperty("name", "foo") + .Build()) + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Message") + .SetScore(2) + .SetCreationTimestampMs(10) + .AddStringProperty("body", "fo") + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace2", "uri3") + .SetSchema("Message") + .SetScore(3) + .SetCreationTimestampMs(10) + .AddStringProperty("body", "fo") + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + // "f" will match all 3 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("f"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_schema("Message"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("Email"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + + // Each of the highest scored documents of schema type "Message" (document3) + // and "Email" (document1) should be returned. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document3; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingMultiNamespaceAndSchemaGrouping) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 3 documents and ensures the relationship in terms of document + // score is: document1 < document2 < document3 < document4 < document5 < + // document6 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace2", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(3) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document4 = + DocumentBuilder() + .SetKey("namespace2", "uri/4") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(4) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document5 = + DocumentBuilder() + .SetKey("namespace3", "uri/5") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(5) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document6 = + DocumentBuilder() + .SetKey("namespace3", "uri/6") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(6) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk()); + + // "m" will match all 6 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace1"); + entry->set_schema("Message"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace2"); + entry->set_schema("Message"); + result_grouping = result_spec.add_result_groupings(); + result_grouping->set_max_results(1); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("namespace3"); + entry->set_schema("Message"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + + // The three highest scored documents that fit the criteria of + // "namespace1xMessage" (document2), "namespace2xMessage" (document4), + // and "namespace3xMessage" (document6) should be returned. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document6; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document4; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingNonexistentNamespaceShouldBeIgnored) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::NAMESPACE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace1"); + entry = result_grouping->add_entry_groupings(); + entry->set_namespace_("nonexistentNamespace"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + + // Only the top ranked document in "namespace" (document2), should be + // returned. The presence of "nonexistentNamespace" in the same result + // grouping should have no effect. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingNonexistentSchemaShouldBeIgnored) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_schema("Message"); + entry = result_grouping->add_entry_groupings(); + entry->set_schema("nonexistentMessage"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + + // Only the top ranked document in "Message" (document2), should be + // returned. The presence of "nonexistentMessage" in the same result + // grouping should have no effect. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates 2 documents and ensures the relationship in terms of document + // score is: document1 < document2 + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetScore(1) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace1", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetScore(2) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace2", "uri/3") + .SetSchema("Message") + .AddStringProperty("body", "message3") + .SetScore(3) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + DocumentProto document4 = + DocumentBuilder() + .SetKey("namespace2", "uri/4") + .SetSchema("Message") + .AddStringProperty("body", "message4") + .SetScore(4) + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + + // "m" will match all 2 documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + + ResultSpecProto result_spec; + result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); + ResultSpecProto::ResultGrouping* result_grouping = + result_spec.add_result_groupings(); + ResultSpecProto::ResultGrouping::Entry* entry = + result_grouping->add_entry_groupings(); + result_grouping->set_max_results(1); + entry->set_namespace_("namespace2"); + entry->set_schema("Message"); + entry = result_grouping->add_entry_groupings(); + entry->set_schema("namespace1"); + entry->set_schema("nonexistentMessage"); + + SearchResultProto search_result_proto = + icing.Search(search_spec, scoring_spec, result_spec); + + // Only the top ranked document in "namespace2xMessage" (document4), should be + // returned. The presence of "namespace1xnonexistentMessage" in the same + // result grouping should have no effect. If either the namespace or the + // schema type is nonexistent, the entire entry will be ignored. + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document4; + + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); +} + +TEST_P(IcingSearchEngineSearchTest, SnippetNormalization) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "MDI zurich Team Meeting") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "mdi Zürich Team Meeting") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("mdi Zürich"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(2); + result_spec.mutable_snippet_spec()->set_num_to_snippet(2); + + SearchResultProto results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + ASSERT_THAT(results.results(), SizeIs(2)); + const DocumentProto& result_document_1 = results.results(0).document(); + const SnippetProto& result_snippet_1 = results.results(0).snippet(); + EXPECT_THAT(result_document_1, EqualsProto(document_two)); + EXPECT_THAT(result_snippet_1.entries(), SizeIs(1)); + EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body")); + std::string_view content = GetString( + &result_document_1, result_snippet_1.entries(0).property_name()); + EXPECT_THAT( + GetWindows(content, result_snippet_1.entries(0)), + ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting")); + EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)), + ElementsAre("mdi", "Zürich")); + + const DocumentProto& result_document_2 = results.results(1).document(); + const SnippetProto& result_snippet_2 = results.results(1).snippet(); + EXPECT_THAT(result_document_2, EqualsProto(document_one)); + EXPECT_THAT(result_snippet_2.entries(), SizeIs(1)); + EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body")); + content = GetString(&result_document_2, + result_snippet_2.entries(0).property_name()); + EXPECT_THAT( + GetWindows(content, result_snippet_2.entries(0)), + ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting")); + EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)), + ElementsAre("MDI", "zurich")); +} + +TEST_P(IcingSearchEngineSearchTest, SnippetNormalizationPrefix) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "MDI zurich Team Meeting") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "mdi Zürich Team Meeting") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("md Zür"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(2); + result_spec.mutable_snippet_spec()->set_num_to_snippet(2); + + SearchResultProto results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + ASSERT_THAT(results.results(), SizeIs(2)); + const DocumentProto& result_document_1 = results.results(0).document(); + const SnippetProto& result_snippet_1 = results.results(0).snippet(); + EXPECT_THAT(result_document_1, EqualsProto(document_two)); + EXPECT_THAT(result_snippet_1.entries(), SizeIs(1)); + EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body")); + std::string_view content = GetString( + &result_document_1, result_snippet_1.entries(0).property_name()); + EXPECT_THAT( + GetWindows(content, result_snippet_1.entries(0)), + ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting")); + EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)), + ElementsAre("mdi", "Zürich")); + + const DocumentProto& result_document_2 = results.results(1).document(); + const SnippetProto& result_snippet_2 = results.results(1).snippet(); + EXPECT_THAT(result_document_2, EqualsProto(document_one)); + EXPECT_THAT(result_snippet_2.entries(), SizeIs(1)); + EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body")); + content = GetString(&result_document_2, + result_snippet_2.entries(0).property_name()); + EXPECT_THAT( + GetWindows(content, result_snippet_2.entries(0)), + ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting")); + EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)), + ElementsAre("MDI", "zurich")); +} + +TEST_P(IcingSearchEngineSearchTest, SnippetSectionRestrict) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); + + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Email") + .AddStringProperty("subject", "MDI zurich Team Meeting") + .AddStringProperty("body", "MDI zurich Team Meeting") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Email") + .AddStringProperty("subject", "MDI zurich trip") + .AddStringProperty("body", "Let's travel to zurich") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + auto search_spec = std::make_unique(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("body:Zür"); + search_spec->set_search_type(GetParam()); + + auto result_spec = std::make_unique(); + result_spec->set_num_per_page(1); + result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64); + result_spec->mutable_snippet_spec()->set_num_matches_per_property(10); + result_spec->mutable_snippet_spec()->set_num_to_snippet(10); + + auto scoring_spec = std::make_unique(); + *scoring_spec = GetDefaultScoringSpec(); + + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + ASSERT_THAT(results.results(), SizeIs(1)); + + const DocumentProto& result_document_two = results.results(0).document(); + const SnippetProto& result_snippet_two = results.results(0).snippet(); + EXPECT_THAT(result_document_two, EqualsProto(document_two)); + EXPECT_THAT(result_snippet_two.entries(), SizeIs(1)); + EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body")); + std::string_view content = GetString( + &result_document_two, result_snippet_two.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)), + ElementsAre("Let's travel to zurich")); + EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)), + ElementsAre("zurich")); + + search_spec.reset(); + scoring_spec.reset(); + result_spec.reset(); + + results = icing.GetNextPage(results.next_page_token()); + EXPECT_THAT(results.status(), ProtoIsOk()); + ASSERT_THAT(results.results(), SizeIs(1)); + + const DocumentProto& result_document_one = results.results(0).document(); + const SnippetProto& result_snippet_one = results.results(0).snippet(); + EXPECT_THAT(result_document_one, EqualsProto(document_one)); + EXPECT_THAT(result_snippet_one.entries(), SizeIs(1)); + EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body")); + content = GetString(&result_document_one, + result_snippet_one.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)), + ElementsAre("MDI zurich Team Meeting")); + EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)), + ElementsAre("zurich")); +} + +TEST_P(IcingSearchEngineSearchTest, Hyphens) { + // TODO(b/208654892): Fix issues with minus/hyphen chars. + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + GTEST_SKIP() + << "Advanced query doesn't properly support hyphens at this time."; + } + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaProto schema; + SchemaTypeConfigProto* type = schema.add_types(); + type->set_schema_type("MyType"); + PropertyConfigProto* prop = type->add_properties(); + prop->set_property_name("foo"); + prop->set_data_type(PropertyConfigProto::DataType::STRING); + prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + prop->mutable_string_indexing_config()->set_term_match_type( + TermMatchType::EXACT_ONLY); + prop->mutable_string_indexing_config()->set_tokenizer_type( + StringIndexingConfig::TokenizerType::PLAIN); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("MyType") + .AddStringProperty("foo", "foo bar-baz bat") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("MyType") + .AddStringProperty("foo", "bar for baz bat-man") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("foo:bar-baz"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + SearchResultProto results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + + EXPECT_THAT(results.status(), ProtoIsOk()); + ASSERT_THAT(results.results(), SizeIs(2)); + EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); + EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionEmptyFieldPath) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query that will match those documents and use an empty field + // mask to request NO properties. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("hello"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + // Retrieve only one result at a time to make sure that projection works when + // retrieving all pages. + result_spec.set_num_per_page(1); + TypePropertyMask* email_field_mask = result_spec.add_type_property_masks(); + email_field_mask->set_schema_type("Email"); + email_field_mask->add_paths(""); + + SearchResultProto results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that the returned results contain no properties. + DocumentProto projected_document_two = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .Build(); + EXPECT_THAT(results.results(0).document(), + EqualsProto(projected_document_two)); + + results = icing.GetNextPage(results.next_page_token()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + DocumentProto projected_document_one = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .Build(); + EXPECT_THAT(results.results(0).document(), + EqualsProto(projected_document_one)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query that will match those documents and request only + // 'sender.name' and 'subject' properties. + // Create all of search_spec, result_spec and scoring_spec as objects with + // scope that will end before the call to GetNextPage to ensure that the + // implementation isn't relying on references to any of them. + auto search_spec = std::make_unique(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + + auto result_spec = std::make_unique(); + // Retrieve only one result at a time to make sure that projection works when + // retrieving all pages. + result_spec->set_num_per_page(1); + TypePropertyMask* email_field_mask = result_spec->add_type_property_masks(); + email_field_mask->set_schema_type("Email"); + email_field_mask->add_paths("sender.name"); + email_field_mask->add_paths("subject"); + + auto scoring_spec = std::make_unique(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that the first returned result only contains the 'sender.name' + // property. + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .Build(); + EXPECT_THAT(results.results(0).document(), + EqualsProto(projected_document_two)); + + // 4. Now, delete all of the specs used in the search. GetNextPage should have + // no problem because it shouldn't be keeping any references to them. + search_spec.reset(); + result_spec.reset(); + scoring_spec.reset(); + + // 5. Verify that the second returned result only contains the 'sender.name' + // property. + results = icing.GetNextPage(results.next_page_token()); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .Build()) + .AddStringProperty("subject", "Hello World!") + .Build(); + EXPECT_THAT(results.results(0).document(), + EqualsProto(projected_document_one)); +} + +TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { + auto fake_clock = std::make_unique(); + fake_clock->SetTimerElapsedMilliseconds(5); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates and inserts 5 documents + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); + DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.add_namespace_filters("namespace"); + search_spec.add_schema_type_filters(document1.schema()); + search_spec.set_query("message"); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(2); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); + result_spec.mutable_snippet_spec()->set_num_to_snippet(3); + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + + // Searches and gets the first page, 2 results with 2 snippets + SearchResultProto search_result = + icing.Search(search_spec, scoring_spec, result_spec); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(2)); + ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken)); + + // Check the stats + QueryStatsProto exp_stats; + exp_stats.set_query_length(7); + exp_stats.set_num_terms(1); + exp_stats.set_num_namespaces_filtered(1); + exp_stats.set_num_schema_types_filtered(1); + exp_stats.set_ranking_strategy( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + exp_stats.set_is_first_page(true); + exp_stats.set_requested_page_size(2); + exp_stats.set_num_results_returned_current_page(2); + exp_stats.set_num_documents_scored(5); + exp_stats.set_num_results_with_snippets(2); + exp_stats.set_latency_ms(5); + exp_stats.set_parse_query_latency_ms(5); + exp_stats.set_scoring_latency_ms(5); + exp_stats.set_ranking_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + exp_stats.set_lock_acquisition_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + // Second page, 2 result with 1 snippet + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(2)); + ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); + + exp_stats = QueryStatsProto(); + exp_stats.set_is_first_page(false); + exp_stats.set_requested_page_size(2); + exp_stats.set_num_results_returned_current_page(2); + exp_stats.set_num_results_with_snippets(1); + exp_stats.set_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + exp_stats.set_lock_acquisition_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + // Third page, 1 result with 0 snippets + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(1)); + ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); + + exp_stats = QueryStatsProto(); + exp_stats.set_is_first_page(false); + exp_stats.set_requested_page_size(2); + exp_stats.set_num_results_returned_current_page(1); + exp_stats.set_num_results_with_snippets(0); + exp_stats.set_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + exp_stats.set_lock_acquisition_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); +} + +TEST_P(IcingSearchEngineSearchTest, SnippetErrorTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetScore(10) + .SetSchema("Generic") + .AddStringProperty("subject", "I like cats", "I like dogs", + "I like birds", "I like fish") + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetScore(20) + .SetSchema("Generic") + .AddStringProperty("subject", "I like red", "I like green", + "I like blue", "I like yellow") + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetScore(5) + .SetSchema("Generic") + .AddStringProperty("subject", "I like cupcakes", "I like donuts", + "I like eclairs", "I like froyo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.add_schema_type_filters("Generic"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_query("like"); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_num_to_snippet(2); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(3); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4); + SearchResultProto search_results = + icing.Search(search_spec, scoring_spec, result_spec); + + ASSERT_THAT(search_results.results(), SizeIs(3)); + const SearchResultProto::ResultProto* result = &search_results.results(0); + EXPECT_THAT(result->document().uri(), Eq("uri2")); + ASSERT_THAT(result->snippet().entries(), SizeIs(3)); + const SnippetProto::EntryProto* entry = &result->snippet().entries(0); + EXPECT_THAT(entry->property_name(), "subject[0]"); + std::string_view content = GetString(&result->document(), "subject[0]"); + EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); + + entry = &result->snippet().entries(1); + EXPECT_THAT(entry->property_name(), "subject[1]"); + content = GetString(&result->document(), "subject[1]"); + EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); + + entry = &result->snippet().entries(2); + EXPECT_THAT(entry->property_name(), "subject[2]"); + content = GetString(&result->document(), "subject[2]"); + EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); + + result = &search_results.results(1); + EXPECT_THAT(result->document().uri(), Eq("uri1")); + ASSERT_THAT(result->snippet().entries(), SizeIs(3)); + entry = &result->snippet().entries(0); + EXPECT_THAT(entry->property_name(), "subject[0]"); + content = GetString(&result->document(), "subject[0]"); + EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); + + entry = &result->snippet().entries(1); + ASSERT_THAT(entry->property_name(), "subject[1]"); + content = GetString(&result->document(), "subject[1]"); + EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); + + entry = &result->snippet().entries(2); + ASSERT_THAT(entry->property_name(), "subject[2]"); + content = GetString(&result->document(), "subject[2]"); + EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); + + result = &search_results.results(2); + ASSERT_THAT(result->document().uri(), Eq("uri3")); + ASSERT_THAT(result->snippet().entries(), IsEmpty()); +} + +TEST_P(IcingSearchEngineSearchTest, CJKSnippetTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // String: "我每天走路去上班。" + // ^ ^ ^ ^^ + // UTF8 idx: 0 3 9 15 18 + // UTF16 idx: 0 1 3 5 6 + // Breaks into segments: "我", "每天", "走路", "去", "上班" + constexpr std::string_view kChinese = "我每天走路去上班。"; + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", kChinese) + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // Search and request snippet matching but no windowing. + SearchSpecProto search_spec; + search_spec.set_query("走"); + search_spec.set_term_match_type(TERM_MATCH_PREFIX); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_num_to_snippet( + std::numeric_limits::max()); + result_spec.mutable_snippet_spec()->set_num_matches_per_property( + std::numeric_limits::max()); + + // Search and make sure that we got a single successful result + SearchResultProto search_results = icing.Search( + search_spec, ScoringSpecProto::default_instance(), result_spec); + ASSERT_THAT(search_results.status(), ProtoIsOk()); + ASSERT_THAT(search_results.results(), SizeIs(1)); + const SearchResultProto::ResultProto* result = &search_results.results(0); + EXPECT_THAT(result->document().uri(), Eq("uri1")); + + // Ensure that one and only one property was matched and it was "body" + ASSERT_THAT(result->snippet().entries(), SizeIs(1)); + const SnippetProto::EntryProto* entry = &result->snippet().entries(0); + EXPECT_THAT(entry->property_name(), Eq("body")); + + // Get the content for "subject" and see what the match is. + std::string_view content = GetString(&result->document(), "body"); + ASSERT_THAT(content, Eq(kChinese)); + + // Ensure that there is one and only one match within "subject" + ASSERT_THAT(entry->snippet_matches(), SizeIs(1)); + const SnippetMatchProto& match_proto = entry->snippet_matches(0); + + EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9)); + EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6)); + std::string_view match = + content.substr(match_proto.exact_match_byte_position(), + match_proto.exact_match_byte_length()); + ASSERT_THAT(match, Eq("走路")); + + // Ensure that the utf-16 values are also as expected + EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3)); + EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2)); +} + +TEST_P(IcingSearchEngineSearchTest, InvalidToEmptyQueryTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // String: "Luca Brasi sleeps with the 🐟🐟🐟." + // ^ ^ ^ ^ ^ ^ ^ ^ ^ + // UTF8 idx: 0 5 11 18 23 27 3135 39 + // UTF16 idx: 0 5 11 18 23 27 2931 33 + // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟" + // and "🐟". + constexpr std::string_view kSicilianMessage = + "Luca Brasi sleeps with the 🐟🐟🐟."; + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", kSicilianMessage) + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "Some other content.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // Search and request snippet matching but no windowing. + SearchSpecProto search_spec; + search_spec.set_query("?"); + search_spec.set_term_match_type(TERM_MATCH_PREFIX); + search_spec.set_search_type(GetParam()); + ScoringSpecProto scoring_spec; + ResultSpecProto result_spec; + + // Search and make sure that we got a single successful result + SearchResultProto search_results = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_results.status(), ProtoIsOk()); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + // This is the actual correct behavior. + EXPECT_THAT(search_results.results(), IsEmpty()); + } else { + EXPECT_THAT(search_results.results(), SizeIs(2)); + } + + search_spec.set_query("。"); + search_results = icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_results.status(), ProtoIsOk()); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + // This is the actual correct behavior. + EXPECT_THAT(search_results.results(), IsEmpty()); + } else { + EXPECT_THAT(search_results.results(), SizeIs(2)); + } + + search_spec.set_query("-"); + search_results = icing.Search(search_spec, scoring_spec, result_spec); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + // This is the actual correct behavior. + EXPECT_THAT(search_results.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + } else { + EXPECT_THAT(search_results.status(), ProtoIsOk()); + EXPECT_THAT(search_results.results(), SizeIs(2)); + } + + search_spec.set_query(":"); + search_results = icing.Search(search_spec, scoring_spec, result_spec); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + // This is the actual correct behavior. + EXPECT_THAT(search_results.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + } else { + EXPECT_THAT(search_results.status(), ProtoIsOk()); + EXPECT_THAT(search_results.results(), SizeIs(2)); + } + + search_spec.set_query("OR"); + search_results = icing.Search(search_spec, scoring_spec, result_spec); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + EXPECT_THAT(search_results.status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + } else { + EXPECT_THAT(search_results.status(), ProtoIsOk()); + EXPECT_THAT(search_results.results(), SizeIs(2)); + } + + search_spec.set_query(" "); + search_results = icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(search_results.status(), ProtoIsOk()); + EXPECT_THAT(search_results.results(), SizeIs(2)); +} + +TEST_P(IcingSearchEngineSearchTest, EmojiSnippetTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // String: "Luca Brasi sleeps with the 🐟🐟🐟." + // ^ ^ ^ ^ ^ ^ ^ ^ ^ + // UTF8 idx: 0 5 11 18 23 27 3135 39 + // UTF16 idx: 0 5 11 18 23 27 2931 33 + // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟" + // and "🐟". + constexpr std::string_view kSicilianMessage = + "Luca Brasi sleeps with the 🐟🐟🐟."; + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", kSicilianMessage) + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "Some other content.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // Search and request snippet matching but no windowing. + SearchSpecProto search_spec; + search_spec.set_query("🐟"); + search_spec.set_term_match_type(TERM_MATCH_PREFIX); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_num_to_snippet(1); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); + + // Search and make sure that we got a single successful result + SearchResultProto search_results = icing.Search( + search_spec, ScoringSpecProto::default_instance(), result_spec); + ASSERT_THAT(search_results.status(), ProtoIsOk()); + ASSERT_THAT(search_results.results(), SizeIs(1)); + const SearchResultProto::ResultProto* result = &search_results.results(0); + EXPECT_THAT(result->document().uri(), Eq("uri1")); + + // Ensure that one and only one property was matched and it was "body" + ASSERT_THAT(result->snippet().entries(), SizeIs(1)); + const SnippetProto::EntryProto* entry = &result->snippet().entries(0); + EXPECT_THAT(entry->property_name(), Eq("body")); + + // Get the content for "subject" and see what the match is. + std::string_view content = GetString(&result->document(), "body"); + ASSERT_THAT(content, Eq(kSicilianMessage)); + + // Ensure that there is one and only one match within "subject" + ASSERT_THAT(entry->snippet_matches(), SizeIs(1)); + const SnippetMatchProto& match_proto = entry->snippet_matches(0); + + EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27)); + EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4)); + std::string_view match = + content.substr(match_proto.exact_match_byte_position(), + match_proto.exact_match_byte_length()); + ASSERT_THAT(match, Eq("🐟")); + + // Ensure that the utf-16 values are also as expected + EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27)); + EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2)); +} + +TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person1 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person1") + .SetSchema("Person") + .AddStringProperty("firstName", "first1") + .AddStringProperty("lastName", "last1") + .AddStringProperty("emailAddress", "email1@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + DocumentProto person2 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person2") + .SetSchema("Person") + .AddStringProperty("firstName", "first2") + .AddStringProperty("lastName", "last2") + .AddStringProperty("emailAddress", "email2@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(2) + .Build(); + DocumentProto person3 = + DocumentBuilder() + .SetKey(R"(pkg$db/name#space\\)", "person3") + .SetSchema("Person") + .AddStringProperty("firstName", "first3") + .AddStringProperty("lastName", "last3") + .AddStringProperty("emailAddress", "email3@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + + DocumentProto email1 = + DocumentBuilder() + .SetKey("namespace", "email1") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 1") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + DocumentProto email2 = + DocumentBuilder() + .SetKey("namespace", "email2") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 2") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(2) + .Build(); + DocumentProto email3 = + DocumentBuilder() + .SetKey("namespace", "email3") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 3") + .AddStringProperty("personQualifiedId", + R"(pkg$db/name\#space\\\\#person3)") // escaped + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk()); + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + search_spec.set_search_type(GetParam()); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_max_joined_child_count(100); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::MAX); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + nested_search_spec->set_search_type(GetParam()); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + + // Parent ScoringSpec + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + + // Parent ResultSpec + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + // Since we: + // - Use MAX for aggregation scoring strategy. + // - (Default) use DOCUMENT_SCORE to score child documents. + // - (Default) use DESC as the ranking order. + // + // person1 + email1 should have the highest aggregated score (3) and be + // returned first. person2 + email2 (aggregated score = 2) should be the + // second, and person3 + email3 (aggregated score = 1) should be the last. + SearchResultProto expected_result1; + expected_result1.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_proto1 = + expected_result1.mutable_results()->Add(); + *result_proto1->mutable_document() = person1; + *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1; + + SearchResultProto expected_result2; + expected_result2.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_google::protobuf = + expected_result2.mutable_results()->Add(); + *result_google::protobuf->mutable_document() = person2; + *result_google::protobuf->mutable_joined_results()->Add()->mutable_document() = email2; + + SearchResultProto expected_result3; + expected_result3.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_proto3 = + expected_result3.mutable_results()->Add(); + *result_proto3->mutable_document() = person3; + *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3; + + SearchResultProto result1 = + icing.Search(search_spec, scoring_spec, result_spec); + uint64_t next_page_token = result1.next_page_token(); + EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); + expected_result1.set_next_page_token(next_page_token); + EXPECT_THAT(result1, + EqualsSearchResultIgnoreStatsAndScores(expected_result1)); + + SearchResultProto result2 = icing.GetNextPage(next_page_token); + next_page_token = result2.next_page_token(); + EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); + expected_result2.set_next_page_token(next_page_token); + EXPECT_THAT(result2, + EqualsSearchResultIgnoreStatsAndScores(expected_result2)); + + SearchResultProto result3 = icing.GetNextPage(next_page_token); + next_page_token = result3.next_page_token(); + EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken)); + EXPECT_THAT(result3, + EqualsSearchResultIgnoreStatsAndScores(expected_result3)); +} + +TEST_F(IcingSearchEngineSearchTest, NumericFilterAdvancedQuerySucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create the schema and document store + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("transaction") + .AddProperty(PropertyConfigBuilder() + .SetName("price") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("cost") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document_one = DocumentBuilder() + .SetKey("namespace", "1") + .SetSchema("transaction") + .SetCreationTimestampMs(1) + .AddInt64Property("price", 10) + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = DocumentBuilder() + .SetKey("namespace", "2") + .SetSchema("transaction") + .SetCreationTimestampMs(1) + .AddInt64Property("price", 25) + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + DocumentProto document_three = DocumentBuilder() + .SetKey("namespace", "3") + .SetSchema("transaction") + .SetCreationTimestampMs(1) + .AddInt64Property("cost", 2) + .Build(); + ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("price < 20"); + search_spec.set_search_type( + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); + search_spec.add_enabled_features(std::string(kNumericSearchFeature)); + + SearchResultProto results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + ASSERT_THAT(results.results(), SizeIs(1)); + EXPECT_THAT(results.results(0).document(), EqualsProto(document_one)); + + search_spec.set_query("price == 25"); + results = icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + ASSERT_THAT(results.results(), SizeIs(1)); + EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); + + search_spec.set_query("cost > 2"); + results = icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(results.results(), IsEmpty()); + + search_spec.set_query("cost >= 2"); + results = icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + ASSERT_THAT(results.results(), SizeIs(1)); + EXPECT_THAT(results.results(0).document(), EqualsProto(document_three)); + + search_spec.set_query("price <= 25"); + results = icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + ASSERT_THAT(results.results(), SizeIs(2)); + EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); + EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); +} + +TEST_F(IcingSearchEngineSearchTest, NumericFilterOldQueryFails) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create the schema and document store + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("transaction") + .AddProperty(PropertyConfigBuilder() + .SetName("price") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("cost") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document_one = DocumentBuilder() + .SetKey("namespace", "1") + .SetSchema("transaction") + .SetCreationTimestampMs(1) + .AddInt64Property("price", 10) + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = DocumentBuilder() + .SetKey("namespace", "2") + .SetSchema("transaction") + .SetCreationTimestampMs(1) + .AddInt64Property("price", 25) + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + DocumentProto document_three = DocumentBuilder() + .SetKey("namespace", "3") + .SetSchema("transaction") + .SetCreationTimestampMs(1) + .AddInt64Property("cost", 2) + .Build(); + ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("price < 20"); + search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY); + search_spec.add_enabled_features(std::string(kNumericSearchFeature)); + + SearchResultProto results = + icing.Search(search_spec, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_P(IcingSearchEngineSearchTest, BarisNormalizationTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .SetCreationTimestampMs(1) + .AddStringProperty("name", "Barış") + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + DocumentProto document_two = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .SetCreationTimestampMs(1) + .AddStringProperty("name", "ıbar") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TERM_MATCH_PREFIX); + search_spec.set_search_type(GetParam()); + + ScoringSpecProto scoring_spec; + ResultSpecProto result_spec; + + SearchResultProto exp_results; + exp_results.mutable_status()->set_code(StatusProto::OK); + *exp_results.add_results()->mutable_document() = document; + + search_spec.set_query("barış"); + SearchResultProto results = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results)); + + search_spec.set_query("barıs"); + results = icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results)); + + search_spec.set_query("baris"); + results = icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results)); + + SearchResultProto exp_results2; + exp_results2.mutable_status()->set_code(StatusProto::OK); + *exp_results2.add_results()->mutable_document() = document_two; + search_spec.set_query("ı"); + results = icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(results, EqualsSearchResultIgnoreStatsAndScores(exp_results2)); +} + +TEST_P(IcingSearchEngineSearchTest, LatinSnippetTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + constexpr std::string_view kLatin = "test ḞÖÖḸĬŞĤ test"; + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", kLatin) + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("foo"); + search_spec.set_term_match_type(TERM_MATCH_PREFIX); + search_spec.set_search_type(GetParam()); + + ResultSpecProto result_spec; + result_spec.mutable_snippet_spec()->set_num_to_snippet( + std::numeric_limits::max()); + result_spec.mutable_snippet_spec()->set_num_matches_per_property( + std::numeric_limits::max()); + + // Search and make sure that we got a single successful result + SearchResultProto search_results = icing.Search( + search_spec, ScoringSpecProto::default_instance(), result_spec); + ASSERT_THAT(search_results.status(), ProtoIsOk()); + ASSERT_THAT(search_results.results(), SizeIs(1)); + const SearchResultProto::ResultProto* result = &search_results.results(0); + EXPECT_THAT(result->document().uri(), Eq("uri1")); + + // Ensure that one and only one property was matched and it was "body" + ASSERT_THAT(result->snippet().entries(), SizeIs(1)); + const SnippetProto::EntryProto* entry = &result->snippet().entries(0); + EXPECT_THAT(entry->property_name(), Eq("body")); + + // Ensure that there is one and only one match within "body" + ASSERT_THAT(entry->snippet_matches(), SizeIs(1)); + + // Check that the match is "ḞÖÖḸĬŞĤ". + const SnippetMatchProto& match_proto = entry->snippet_matches(0); + std::string_view match = + kLatin.substr(match_proto.exact_match_byte_position(), + match_proto.submatch_byte_length()); + ASSERT_THAT(match, Eq("ḞÖÖ")); +} + +INSTANTIATE_TEST_SUITE_P( + IcingSearchEngineSearchTest, IcingSearchEngineSearchTest, + testing::Values( + SearchSpecProto::SearchType::ICING_RAW_QUERY, + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY)); + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_suggest_test.cc b/icing/icing-search-engine_suggest_test.cc new file mode 100644 index 0000000..dbd0a11 --- /dev/null +++ b/icing/icing-search-engine_suggest_test.cc @@ -0,0 +1,1304 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/icing-search-engine.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/jni/jni-cache.h" +#include "icing/portable/endian.h" +#include "icing/portable/equals-proto.h" +#include "icing/portable/platform.h" +#include "icing/proto/debug.pb.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" +#include "icing/proto/initialize.pb.h" +#include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/persist.pb.h" +#include "icing/proto/reset.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/status.pb.h" +#include "icing/proto/storage.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/proto/usage.pb.h" +#include "icing/schema-builder.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAre; + +// For mocking purpose, we allow tests to provide a custom Filesystem. +class TestIcingSearchEngine : public IcingSearchEngine { + public: + TestIcingSearchEngine(const IcingSearchEngineOptions& options, + std::unique_ptr filesystem, + std::unique_ptr icing_filesystem, + std::unique_ptr clock, + std::unique_ptr jni_cache) + : IcingSearchEngine(options, std::move(filesystem), + std::move(icing_filesystem), std::move(clock), + std::move(jni_cache)) {} +}; + +std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } + +// This test is meant to cover all tests relating to IcingSearchEngine::Search +// and IcingSearchEngine::SearchSuggestions. +class IcingSearchEngineSuggestTest : public testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + // If we've specified using the reverse-JNI method for segmentation (i.e. + // not ICU), then we won't have the ICU data file included to set up. + // Technically, we could choose to use reverse-JNI for segmentation AND + // include an ICU data file, but that seems unlikely and our current BUILD + // setup doesn't do this. + // File generated via icu_data_file rule in //icing/BUILD. + std::string icu_data_file_path = + GetTestFilePath("icing/icu.dat"); + ICING_ASSERT_OK( + icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); + } + filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); + } + + const Filesystem* filesystem() const { return &filesystem_; } + + private: + Filesystem filesystem_; +}; + +IcingSearchEngineOptions GetDefaultIcingOptions() { + IcingSearchEngineOptions icing_options; + icing_options.set_base_dir(GetTestBaseDir()); + return icing_options; +} + +SchemaProto CreatePersonAndEmailSchema() { + return SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); +} + +TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4 + // termFour, 3 termThree, 2 termTwo and one termOne. + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty( + "subject", "termOne termTwo termThree termFour termFive termSix") + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", + "termTwo termThree termFour termFive termSix") + .Build(); + DocumentProto document3 = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termThree termFour termFive termSix") + .Build(); + DocumentProto document4 = + DocumentBuilder() + .SetKey("namespace", "uri4") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termFour termFive termSix") + .Build(); + DocumentProto document5 = + DocumentBuilder() + .SetKey("namespace", "uri5") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termFive termSix") + .Build(); + DocumentProto document6 = DocumentBuilder() + .SetKey("namespace", "uri6") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termSix") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk()); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("t"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + // Query all suggestions, and they will be ranked. + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions().at(0).query(), "termsix"); + ASSERT_THAT(response.suggestions().at(1).query(), "termfive"); + ASSERT_THAT(response.suggestions().at(2).query(), "termfour"); + ASSERT_THAT(response.suggestions().at(3).query(), "termthree"); + ASSERT_THAT(response.suggestions().at(4).query(), "termtwo"); + ASSERT_THAT(response.suggestions().at(5).query(), "termone"); + + // Query first three suggestions, and they will be ranked. + suggestion_spec.set_num_to_return(3); + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions().at(0).query(), "termsix"); + ASSERT_THAT(response.suggestions().at(1).query(), "termfive"); + ASSERT_THAT(response.suggestions().at(2).query(), "termfour"); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInOneNamespace) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + + // namespace1 has 2 results. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.add_namespace_filters("namespace1"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFoo), + EqualsProto(suggestionFool))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInMultipleNamespace) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fo") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace3", "uri3") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + + // namespace2 and namespace3 has 2 results. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.add_namespace_filters("namespace2"); + suggestion_spec.add_namespace_filters("namespace3"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFoo), + EqualsProto(suggestionFool))); +} + +TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_NamespaceNotFound) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fo") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // Search for non-exist namespace3 + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.add_namespace_filters("namespace3"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + EXPECT_THAT(response.status().code(), Eq(StatusProto::OK)); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // Index 4 documents, + // namespace1 has 2 hit2 for term one + // namespace2 has 2 hit2 for term two and 1 hit for term one. + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termone") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termone") + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termone termtwo") + .Build(); + DocumentProto document4 = DocumentBuilder() + .SetKey("namespace2", "uri3") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "termtwo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionTermOne; + suggestionTermOne.set_query("termone"); + SuggestionResponse::Suggestion suggestionTermTwo; + suggestionTermTwo.set_query("termtwo"); + + // only search suggestion for namespace2. The correctly order should be + // {"termtwo", "termone"}. If we're not filtering out namespace1 when + // calculating our score, then it will be {"termone", "termtwo"}. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("t"); + suggestion_spec.add_namespace_filters("namespace2"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + ElementsAre(EqualsProto(suggestionTermTwo), + EqualsProto(suggestionTermOne))); +} + +TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_DeletionTest) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + + // namespace1 has this suggestion + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.add_namespace_filters("namespace1"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + + // namespace2 has this suggestion + suggestion_spec.clear_namespace_filters(); + suggestion_spec.add_namespace_filters("namespace2"); + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + + // delete document from namespace 1 + EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk()); + + // Now namespace1 will return empty + suggestion_spec.clear_namespace_filters(); + suggestion_spec.add_namespace_filters("namespace1"); + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), IsEmpty()); + + // namespace2 still has this suggestion, so we can prove the reason of + // namespace 1 cannot find it is we filter it out, not it doesn't exist. + suggestion_spec.add_namespace_filters("namespace2"); + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInOneDocument) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + + // Only search in namespace1,uri1 + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + NamespaceDocumentUriGroup* namespace1_uri1 = + suggestion_spec.add_document_uri_filters(); + namespace1_uri1->set_namespace_("namespace1"); + namespace1_uri1->add_document_uris("uri1"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + + // Only search in namespace1,uri2 + suggestion_spec.clear_document_uri_filters(); + NamespaceDocumentUriGroup* namespace1_uri2 = + suggestion_spec.add_document_uri_filters(); + namespace1_uri2->set_namespace_("namespace1"); + namespace1_uri2->add_document_uris("uri2"); + + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFoo))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInMultipleDocument) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace1", "uri3") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + + // Only search document in namespace1,uri1 and namespace2,uri2 + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + NamespaceDocumentUriGroup* namespace1_uri1_uri2 = + suggestion_spec.add_document_uri_filters(); + namespace1_uri1_uri2->set_namespace_("namespace1"); + namespace1_uri1_uri2->add_document_uris("uri1"); + namespace1_uri1_uri2->add_document_uris("uri2"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool), + EqualsProto(suggestionFoo))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace3", "uri3") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + + // Only search document in namespace1,uri1 and all documents under namespace2 + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + suggestion_spec.add_namespace_filters("namespace1"); + suggestion_spec.add_namespace_filters("namespace2"); + NamespaceDocumentUriGroup* namespace1_uri1 = + suggestion_spec.add_document_uri_filters(); + namespace1_uri1->set_namespace_("namespace1"); + namespace1_uri1->add_document_uris("uri1"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool), + EqualsProto(suggestionFoo))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_DocumentIdDoesntExist) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // Search for a non-exist document id : namespace3,uri3 + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + suggestion_spec.add_namespace_filters("namespace3"); + NamespaceDocumentUriGroup* namespace3_uri3 = + suggestion_spec.add_document_uri_filters(); + namespace3_uri3->set_namespace_("namespace3"); + namespace3_uri3->add_document_uris("uri3"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), IsEmpty()); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "foo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + // Search for the document namespace1,uri1 with namespace filter in + // namespace2. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + NamespaceDocumentUriGroup* namespace1_uri1 = + suggestion_spec.add_document_uri_filters(); + namespace1_uri1->set_namespace_("namespace1"); + namespace1_uri1->add_document_uris("uri1"); + suggestion_spec.add_namespace_filters("namespace2"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_EmptyDocumentIdInNamespace) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // Give empty document uris in namespace 1 + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + NamespaceDocumentUriGroup* namespace1_uri1 = + suggestion_spec.add_document_uri_filters(); + namespace1_uri1->set_namespace_("namespace1"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .AddDocumentProperty("sender", DocumentBuilder() + .SetKey("namespace", "uri1-sender") + .SetSchema("Person") + .AddStringProperty("name", "foo") + .Build()) + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Message") + .SetCreationTimestampMs(10) + .AddStringProperty("body", "fo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + suggestion_spec.add_schema_type_filters("Email"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFoo), + EqualsProto(suggestionFool))); +} + +TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_SchemaTypeNotFound) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .SetCreationTimestampMs(10) + .AddStringProperty("body", "fo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + suggestion_spec.add_schema_type_filters("Email"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), IsEmpty()); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_ShouldReturnInDesiredProperty) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri1-sender") + .SetSchema("Person") + .AddStringProperty("name", "foo") + .AddStringProperty("emailAddress", "fo") + .Build()) + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + // Only search in subject. + TypePropertyMask* mask = suggestion_spec.add_type_property_filters(); + mask->set_schema_type("Email"); + mask->add_paths("subject"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + + // Search in subject and sender.name + suggestion_spec.clear_type_property_filters(); + mask = suggestion_spec.add_type_property_filters(); + mask->set_schema_type("Email"); + mask->add_paths("subject"); + mask->add_paths("sender.name"); + + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFoo), + EqualsProto(suggestionFool))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_NestedPropertyReturnNothing) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .AddDocumentProperty("sender", DocumentBuilder() + .SetKey("namespace", "uri1-sender") + .SetSchema("Person") + .AddStringProperty("name", "foo") + .Build()) + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + // Only search in Person.name. + suggestion_spec.add_schema_type_filters("Person"); + TypePropertyMask* mask = suggestion_spec.add_type_property_filters(); + mask->set_schema_type("Person"); + mask->add_paths("name"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), IsEmpty()); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_PropertyFilterAndSchemaFilter) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(10) + .AddStringProperty("subject", "fool") + .AddDocumentProperty("sender", DocumentBuilder() + .SetKey("namespace", "uri1-sender") + .SetSchema("Person") + .AddStringProperty("name", "foo") + .Build()) + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Message") + .SetCreationTimestampMs(10) + .AddStringProperty("body", "fo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFoo; + suggestionFoo.set_query("foo"); + SuggestionResponse::Suggestion suggestionFo; + suggestionFo.set_query("fo"); + + // Search in sender.name of Email and everything in Message. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + suggestion_spec.add_schema_type_filters("Email"); + suggestion_spec.add_schema_type_filters("Message"); + TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters(); + mask1->set_schema_type("Email"); + mask1->add_paths("sender.name"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFoo), + EqualsProto(suggestionFo))); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .SetCreationTimestampMs(10) + .AddStringProperty("body", "fo") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // Search in sender.name of Email but schema type is Message. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + suggestion_spec.add_schema_type_filters("Message"); + TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters(); + mask1->set_schema_type("Email"); + mask1->add_paths("sender.name"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_OrderByTermFrequency) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Message") + .SetCreationTimestampMs(10) + .AddStringProperty( + "body", "termthree termthree termthree termtwo termtwo termone") + .Build(); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + + // Search in sender.name of Email but schema type is Message. + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("t"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::EXACT_ONLY); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY); + + SuggestionResponse::Suggestion suggestionTermOne; + suggestionTermOne.set_query("termone"); + SuggestionResponse::Suggestion suggestionTermTwo; + suggestionTermTwo.set_query("termtwo"); + SuggestionResponse::Suggestion suggestionTermThree; + suggestionTermThree.set_query("termthree"); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + ElementsAre(EqualsProto(suggestionTermThree), + EqualsProto(suggestionTermTwo), + EqualsProto(suggestionTermOne))); +} + +TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_ExpiredTest) { + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(100) + .SetTtlMs(500) + .AddStringProperty("subject", "fool") + .Build(); + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .AddStringProperty("subject", "fool") + .Build(); + { + auto fake_clock = std::make_unique(); + fake_clock->SetSystemTimeMilliseconds(400); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + + // namespace1 has this suggestion + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.add_namespace_filters("namespace1"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + + // namespace2 has this suggestion + suggestion_spec.clear_namespace_filters(); + suggestion_spec.add_namespace_filters("namespace2"); + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + } + // We reinitialize here so we can feed in a fake clock this time + { + // Time needs to be past document1 creation time (100) + ttl (500) for it + // to count as "expired". document2 is not expired since its ttl is 1000. + auto fake_clock = std::make_unique(); + fake_clock->SetSystemTimeMilliseconds(800); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique(), + std::make_unique(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("f"); + suggestion_spec.add_namespace_filters("namespace1"); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + // Now namespace1 will return empty + suggestion_spec.clear_namespace_filters(); + suggestion_spec.add_namespace_filters("namespace1"); + SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), IsEmpty()); + + // namespace2 still has this suggestion + SuggestionResponse::Suggestion suggestionFool; + suggestionFool.set_query("fool"); + + suggestion_spec.add_namespace_filters("namespace2"); + response = icing.SearchSuggestions(suggestion_spec); + ASSERT_THAT(response.status(), ProtoIsOk()); + ASSERT_THAT(response.suggestions(), + UnorderedElementsAre(EqualsProto(suggestionFool))); + } +} + +TEST_F(IcingSearchEngineSuggestTest, SearchSuggestionsTest_emptyPrefix) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix(""); + suggestion_spec.set_num_to_return(10); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineSuggestTest, + SearchSuggestionsTest_NonPositiveNumToReturn) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SuggestionSpecProto suggestion_spec; + suggestion_spec.set_prefix("prefix"); + suggestion_spec.set_num_to_return(0); + suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( + TermMatchType::PREFIX); + suggestion_spec.mutable_scoring_spec()->set_rank_by( + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + + ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 5ec78a0..e0070e0 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -27,8 +27,6 @@ #include "icing/file/filesystem.h" #include "icing/file/mock-filesystem.h" #include "icing/jni/jni-cache.h" -#include "icing/join/join-processor.h" -#include "icing/legacy/index/icing-mock-filesystem.h" #include "icing/portable/endian.h" #include "icing/portable/equals-proto.h" #include "icing/portable/platform.h" @@ -47,19 +45,13 @@ #include "icing/proto/storage.pb.h" #include "icing/proto/term.pb.h" #include "icing/proto/usage.pb.h" -#include "icing/query/query-features.h" #include "icing/schema-builder.h" -#include "icing/schema/schema-store.h" -#include "icing/schema/section.h" -#include "icing/store/document-log-creator.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/icu-data-file-helper.h" #include "icing/testing/jni-test-helpers.h" -#include "icing/testing/random-string.h" #include "icing/testing/test-data.h" #include "icing/testing/tmp-directory.h" -#include "icing/util/snippet-helpers.h" namespace icing { namespace lib { @@ -67,54 +59,16 @@ namespace lib { namespace { using ::icing::lib::portable_equals_proto::EqualsProto; -using ::testing::_; -using ::testing::ElementsAre; using ::testing::Eq; using ::testing::Ge; using ::testing::Gt; using ::testing::HasSubstr; using ::testing::IsEmpty; -using ::testing::Le; -using ::testing::Lt; -using ::testing::Matcher; -using ::testing::Ne; using ::testing::Return; using ::testing::SizeIs; using ::testing::StrEq; using ::testing::UnorderedElementsAre; -constexpr std::string_view kIpsumText = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis " - "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida " - "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam " - "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo " - "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, " - "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula " - "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et " - "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, " - "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis " - "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. " - "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. " - "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur " - "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh " - "placerat semper."; - -PortableFileBackedProtoLog::Header ReadDocumentLogHeader( - Filesystem filesystem, const std::string& file_path) { - PortableFileBackedProtoLog::Header header; - filesystem.PRead(file_path.c_str(), &header, - sizeof(PortableFileBackedProtoLog::Header), - /*offset=*/0); - return header; -} - -void WriteDocumentLogHeader( - Filesystem filesystem, const std::string& file_path, - PortableFileBackedProtoLog::Header& header) { - filesystem.Write(file_path.c_str(), &header, - sizeof(PortableFileBackedProtoLog::Header)); -} - // For mocking purpose, we allow tests to provide a custom Filesystem. class TestIcingSearchEngine : public IcingSearchEngine { public: @@ -130,6 +84,8 @@ class TestIcingSearchEngine : public IcingSearchEngine { std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } +// This test is meant to cover all tests relating to IcingSearchEngine apis not +// specifically covered by the other IcingSearchEngine*Test. class IcingSearchEngineTest : public testing::Test { protected: void SetUp() override { @@ -158,21 +114,9 @@ class IcingSearchEngineTest : public testing::Test { Filesystem filesystem_; }; -constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1; - // Non-zero value so we don't override it to be the current time constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; -std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; } - -std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; } - -std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; } - -std::string GetHeaderFilename() { - return GetTestBaseDir() + "/icing_search_engine_header"; -} - IcingSearchEngineOptions GetDefaultIcingOptions() { IcingSearchEngineOptions icing_options; icing_options.set_base_dir(GetTestBaseDir()); @@ -188,19 +132,6 @@ DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { .Build(); } -DocumentProto CreateEmailDocument(const std::string& name_space, - const std::string& uri, int score, - const std::string& subject_content, - const std::string& body_content) { - return DocumentBuilder() - .SetKey(name_space, uri) - .SetSchema("Email") - .SetScore(score) - .AddStringProperty("subject", subject_content) - .AddStringProperty("body", body_content) - .Build(); -} - SchemaProto CreateMessageSchema() { return SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( @@ -211,23 +142,6 @@ SchemaProto CreateMessageSchema() { .Build(); } -SchemaProto CreateEmailSchema() { - return SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .Build(); -} - SchemaProto CreatePersonAndEmailSchema() { return SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -280,9912 +194,675 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri, return usage_report; } -std::vector GetUrisFromSearchResults( - SearchResultProto& search_result_proto) { - std::vector result_uris; - result_uris.reserve(search_result_proto.results_size()); - for (int i = 0; i < search_result_proto.results_size(); i++) { - result_uris.push_back( - search_result_proto.mutable_results(i)->document().uri()); - } - return result_uris; -} - -TEST_F(IcingSearchEngineTest, SimpleInitialization) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, InitializingAgainSavesNonPersistedData) { +TEST_F(IcingSearchEngineTest, GetDocument) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + // Simple put and get + ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), + ProtoIsOk()); GetResultProto expected_get_result_proto; expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document; - + *expected_get_result_proto.mutable_document() = + CreateMessageDocument("namespace", "uri"); ASSERT_THAT( icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), EqualsProto(expected_get_result_proto)); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(std::numeric_limits::max()); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), + // Put an invalid document + PutResultProto put_result_proto = icing.Put(DocumentProto()); + EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} + EXPECT_THAT(put_result_proto.status().message(), + HasSubstr("'namespace' is empty")); -TEST_F(IcingSearchEngineTest, NegativeMergeSizeReturnsInvalidArgument) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(-1); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + // Get a non-existing key + expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); + expected_get_result_proto.mutable_status()->set_message( + "Document (wrong, uri) not found."); + expected_get_result_proto.clear_document(); + ASSERT_THAT(icing.Get("wrong", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); } -TEST_F(IcingSearchEngineTest, ZeroMergeSizeReturnsInvalidArgument) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(0); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} +TEST_F(IcingSearchEngineTest, GetDocumentProjectionEmpty) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); -TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - // One is fine, if a bit weird. It just means that the lite index will be - // smaller and will request a merge any time content is added to it. - options.set_index_merge_size(1); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); -} + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); -TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_max_token_length(-1); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} + GetResultSpecProto result_spec; + TypePropertyMask* mask = result_spec.add_type_property_masks(); + mask->set_schema_type(document.schema()); + mask->add_paths(""); -TEST_F(IcingSearchEngineTest, ZeroMaxTokenLenReturnsInvalidArgument) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_max_token_length(0); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; + expected_get_result_proto.mutable_document()->clear_properties(); + ASSERT_THAT(icing.Get("namespace", "uri", result_spec), + EqualsProto(expected_get_result_proto)); } -TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - // A length of 1 is allowed - even though it would be strange to want - // this. - options.set_max_token_length(1); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); +TEST_F(IcingSearchEngineTest, GetDocumentWildCardProjectionEmpty) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); DocumentProto document = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // "message" should have been truncated to "m" - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - // The indexed tokens were truncated to length of 1, so "m" will match - search_spec.set_query("m"); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document; - - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - // The query token is also truncated to length of 1, so "me"->"m" matches "m" - search_spec.set_query("me"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); + GetResultSpecProto result_spec; + TypePropertyMask* mask = result_spec.add_type_property_masks(); + mask->set_schema_type("*"); + mask->add_paths(""); - // The query token is still truncated to length of 1, so "massage"->"m" - // matches "m" - search_spec.set_query("massage"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; + expected_get_result_proto.mutable_document()->clear_properties(); + ASSERT_THAT(icing.Get("namespace", "uri", result_spec), + EqualsProto(expected_get_result_proto)); } -TEST_F(IcingSearchEngineTest, - MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - // Set token length to max. This is allowed (it just means never to - // truncate tokens). However, this does mean that tokens that exceed the - // size of the lexicon will cause indexing to fail. - options.set_max_token_length(std::numeric_limits::max()); - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); +TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleFieldPaths) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); - // Add a document that just barely fits under the max document limit. - // This will still fail to index because we won't actually have enough - // room in the lexicon to fit this content. - std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p'); + // 1. Add an email document DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Message") - .AddStringProperty("body", std::move(enormous_string)) + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); - EXPECT_THAT(icing.Put(document).status(), - ProtoStatusIs(StatusProto::OUT_OF_SPACE)); - - SearchSpecProto search_spec; - search_spec.set_query("p"); - search_spec.set_term_match_type(TermMatchType::PREFIX); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, FailToCreateDocStore) { - auto mock_filesystem = std::make_unique(); - // This fails DocumentStore::Create() - ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_)) - .WillByDefault(Return(false)); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); + GetResultSpecProto result_spec; + TypePropertyMask* mask = result_spec.add_type_property_masks(); + mask->set_schema_type("Email"); + mask->add_paths("sender.name"); + mask->add_paths("subject"); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), - ProtoStatusIs(StatusProto::INTERNAL)); - EXPECT_THAT(initialize_result_proto.status().message(), - HasSubstr("Could not create directory")); + // 2. Verify that the returned result only contains the 'sender.name' + // property and the 'subject' property. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .Build()) + .AddStringProperty("subject", "Hello World!") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), + EqualsProto(expected_get_result_proto)); } -TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresAtThreshold) { - Filesystem filesystem; - DocumentProto email1 = - CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); - email1.set_creation_timestamp_ms(10000); - DocumentProto email2 = - CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); - email2.set_creation_timestamp_ms(10000); - - { - // Create an index with a few documents. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoIsOk()); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(0)); - ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); - } - - // Write an init marker file with 5 previously failed attempts. - std::string marker_filepath = GetTestBaseDir() + "/init_marker"; +TEST_F(IcingSearchEngineTest, GetDocumentWildcardProjectionMultipleFieldPaths) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); - { - ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); - int network_init_attempts = GHostToNetworkL(5); - // Write the updated number of attempts before we get started. - ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, - &network_init_attempts, - sizeof(network_init_attempts))); - ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); - } + // 1. Add an email document + DocumentProto document = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - { - // Create the index again and verify that initialization succeeds and no - // data is thrown out. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoIsOk()); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(5)); - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) - .document(), - EqualsProto(email1)); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) - .document(), - EqualsProto(email2)); - } + GetResultSpecProto result_spec; + TypePropertyMask* mask = result_spec.add_type_property_masks(); + mask->set_schema_type("*"); + mask->add_paths("sender.name"); + mask->add_paths("subject"); - // The successful init should have thrown out the marker file. - ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); + // 2. Verify that the returned result only contains the 'sender.name' + // property and the 'subject' property. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .Build()) + .AddStringProperty("subject", "Hello World!") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), + EqualsProto(expected_get_result_proto)); } -TEST_F(IcingSearchEngineTest, InitMarkerFilePreviousFailuresBeyondThreshold) { - Filesystem filesystem; - DocumentProto email1 = - CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); - DocumentProto email2 = - CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); - - { - // Create an index with a few documents. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoIsOk()); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(0)); - ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); - } - - // Write an init marker file with 6 previously failed attempts. - std::string marker_filepath = GetTestBaseDir() + "/init_marker"; +TEST_F(IcingSearchEngineTest, + GetDocumentSpecificProjectionOverridesWildcardProjection) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); - { - ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); - int network_init_attempts = GHostToNetworkL(6); - // Write the updated number of attempts before we get started. - ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, - &network_init_attempts, - sizeof(network_init_attempts))); - ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); - } + // 1. Add an email document + DocumentProto document = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - { - // Create the index again and verify that initialization succeeds and all - // data is thrown out. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), - ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(6)); - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - } + // 2. Add type property masks for the wildcard and the specific type of the + // document 'Email'. The wildcard should be ignored and only the 'Email' + // projection should apply. + GetResultSpecProto result_spec; + TypePropertyMask* mask = result_spec.add_type_property_masks(); + mask->set_schema_type("*"); + mask->add_paths("subject"); + mask = result_spec.add_type_property_masks(); + mask->set_schema_type("Email"); + mask->add_paths("body"); - // The successful init should have thrown out the marker file. - ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); + // 3. Verify that the returned result only contains the 'body' property. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), + EqualsProto(expected_get_result_proto)); } -TEST_F(IcingSearchEngineTest, SuccessiveInitFailuresIncrementsInitMarker) { - Filesystem filesystem; - DocumentProto email1 = - CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); - DocumentProto email2 = - CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); +TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - { - // 1. Create an index with a few documents. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoIsOk()); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(0)); - ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); - } + // Creates 3 test documents + DocumentProto document1 = + DocumentBuilder() + .SetKey("namespace", "uri/1") + .SetSchema("Message") + .AddStringProperty("body", "message1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("namespace", "uri/2") + .SetSchema("Message") + .AddStringProperty("body", "message2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); - { - // 2. Create an index that will encounter an IO failure when trying to - // create the document log. - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); - - auto mock_filesystem = std::make_unique(); - std::string document_log_filepath = - icing_options.base_dir() + "/document_dir/document_log_v1"; - auto get_filesize_lambda = [this, - &document_log_filepath](const char* filename) { - if (strncmp(document_log_filepath.c_str(), filename, - document_log_filepath.length()) == 0) { - return Filesystem::kBadFileSize; - } - return this->filesystem()->GetFileSize(filename); - }; - ON_CALL(*mock_filesystem, GetFileSize(A())) - .WillByDefault(get_filesize_lambda); - - TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), - GetTestJniCache()); - - // Fail to initialize six times in a row. - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(0)); - - init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(1)); - - init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(2)); - - init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(3)); - - init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(4)); - - init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(5)); - } + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - { - // 3. Create the index again and verify that initialization succeeds and all - // data is thrown out. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - ASSERT_THAT(init_result.status(), - ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); - ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), - Eq(6)); - - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - } + // Report usage for doc1 and doc2. The older timestamp 5000 shouldn't be + // overridden by 1000. The order will be doc1 > doc2 when ranked by + // USAGE_TYPE1_LAST_USED_TIMESTAMP. + UsageReport usage_report_doc1_time1 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/1000, + UsageReport::USAGE_TYPE1); + UsageReport usage_report_doc1_time5 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/5000, + UsageReport::USAGE_TYPE1); + UsageReport usage_report_doc2_time3 = CreateUsageReport( + /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/3000, + UsageReport::USAGE_TYPE1); + ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time5).status(), ProtoIsOk()); + ASSERT_THAT(icing.ReportUsage(usage_report_doc2_time3).status(), ProtoIsOk()); + ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time1).status(), ProtoIsOk()); - // The successful init should have thrown out the marker file. - std::string marker_filepath = GetTestBaseDir() + "/init_marker"; - ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); -} + // "m" will match both documents + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("m"); -TEST_F(IcingSearchEngineTest, - CircularReferenceCreateSectionManagerReturnsInvalidArgument) { - // Create a type config with a circular reference. - SchemaProto schema; - auto* type = schema.add_types(); - type->set_schema_type("Message"); - - auto* body = type->add_properties(); - body->set_property_name("recipient"); - body->set_schema_type("Person"); - body->set_data_type(PropertyConfigProto::DataType::DOCUMENT); - body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - body->mutable_document_indexing_config()->set_index_nested_properties(true); - - type = schema.add_types(); - type->set_schema_type("Person"); - - body = type->add_properties(); - body->set_property_name("recipient"); - body->set_schema_type("Message"); - body->set_data_type(PropertyConfigProto::DataType::DOCUMENT); - body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - body->mutable_document_indexing_config()->set_index_nested_properties(true); + // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document1; + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document2; - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(schema).status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -TEST_F(IcingSearchEngineTest, PutWithoutSchemaFailedPrecondition) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - +TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) { DocumentProto document = CreateMessageDocument("namespace", "uri"); - PutResultProto put_result_proto = icing.Put(document); - EXPECT_THAT(put_result_proto.status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set")); -} + { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + } // Destructing calls a PersistToDisk(FULL) -TEST_F(IcingSearchEngineTest, FailToReadSchema) { - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - { - // Successfully initialize and set a schema - IcingSearchEngine icing(icing_options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - } + // There should be no recovery since everything should be saved properly. + InitializeResultProto init_result = icing.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); - auto mock_filesystem = std::make_unique(); + // Schema is still intact. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - // This fails FileBackedProto::Read() when we try to check the schema we - // had previously set - ON_CALL(*mock_filesystem, - OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb"))) - .WillByDefault(Return(-1)); - - TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), - GetTestJniCache()); - - InitializeResultProto initialize_result_proto = test_icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), - ProtoStatusIs(StatusProto::INTERNAL)); - EXPECT_THAT(initialize_result_proto.status().message(), - HasSubstr("Unable to open file for read")); -} + EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto)); -TEST_F(IcingSearchEngineTest, FailToWriteSchema) { - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); + // Documents are still intact. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; - auto mock_filesystem = std::make_unique(); - // This fails FileBackedProto::Write() - ON_CALL(*mock_filesystem, OpenForWrite(HasSubstr("schema.pb"))) - .WillByDefault(Return(-1)); + EXPECT_THAT( + icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); - TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); + // Index is still intact. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; - SetSchemaResultProto set_schema_result_proto = - icing.SetSchema(CreateMessageSchema()); - EXPECT_THAT(set_schema_result_proto.status(), - ProtoStatusIs(StatusProto::INTERNAL)); - EXPECT_THAT(set_schema_result_proto.status().message(), - HasSubstr("Unable to open file for write")); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleFails) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 1. Create a schema with an Email type with properties { "title", "body"} - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - // 2. Add an email document - DocumentProto doc = DocumentBuilder() - .SetKey("emails", "email#1") - .SetSchema("Email") - .AddStringProperty("title", "Hello world.") - .AddStringProperty("body", "Goodnight Moon.") - .Build(); - EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk()); - } +TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) { + DocumentProto document = CreateMessageDocument("namespace", "uri"); - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + // Add schema and documents to our first icing1 instance. + IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); + EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); - // 3. Set a schema that deletes email. This should fail. - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Message"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT( - icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false) - .status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - - // 4. Try to delete by email type. This should succeed because email wasn't - // deleted in step 3. - EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk()); - } -} + // Initialize a second icing2 instance which should have it's own memory + // space. If data from icing1 isn't being persisted to the files, then icing2 + // won't be able to see those changes. + IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); -TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleForceOverrideSucceeds) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + // There should be no recovery since everything should be saved properly. + InitializeResultProto init_result = icing2.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); - // 1. Create a schema with an Email type with properties { "title", "body"} - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - // 2. Add an email document - DocumentProto doc = DocumentBuilder() - .SetKey("emails", "email#1") - .SetSchema("Email") - .AddStringProperty("title", "Hello world.") - .AddStringProperty("body", "Goodnight Moon.") - .Build(); - EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk()); - } + // Schema is still intact. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing2.GetSchema(), + EqualsProto(expected_get_schema_result_proto)); - // 3. Set a schema that deletes email with force override. This should - // succeed and delete the email type. - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Message"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk()); - - // 4. Try to delete by email type. This should fail because email was - // already deleted. - EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - } -} + // Documents are still intact. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = document; -TEST_F(IcingSearchEngineTest, SetSchemaUnsetVersionIsZero) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT( + icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); - // 1. Create a schema with an Email type with version 1 - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); + // Index is still intact. + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0)); + SearchResultProto actual_results = + icing2.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 1. Create a schema with an Email type with version 1 - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(1); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - SetSchemaResultProto set_schema_result = icing.SetSchema(schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_new_schema_types()->Add("Email"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 2. Create schema that adds a new optional property and updates version. - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(2); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - // 3. SetSchema should succeed and the version number should be updated. - SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_fully_compatible_changed_schema_types() - ->Add("Email"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); - } -} - -TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleVersionUpdateFails) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 1. Create a schema with an Email type with version 1 - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(1); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED) - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(2); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - - // 3. SetSchema should fail and the version number should NOT be updated. - EXPECT_THAT(icing.SetSchema(schema).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); - } -} - -TEST_F(IcingSearchEngineTest, - SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 1. Create a schema with an Email type with version 1 - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(1); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED) - // with force override to true. - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(2); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - - // 3. SetSchema should succeed and the version number should be updated. - EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk()); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); - } -} - -TEST_F(IcingSearchEngineTest, SetSchemaNoChangeVersionUpdateSucceeds) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 1. Create a schema with an Email type with version 1 - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(1); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1)); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 2. Create schema that only changes the version. - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_version(2); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - // 3. SetSchema should succeed and the version number should be updated. - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2)); - } -} - -TEST_F(IcingSearchEngineTest, SetSchemaDuplicateTypesReturnsAlreadyExists) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Create a schema with types { "Email", "Message" and "Email" } - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - type = schema.add_types(); - type->set_schema_type("Message"); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - *schema.add_types() = schema.types(0); - - EXPECT_THAT(icing.SetSchema(schema).status(), - ProtoStatusIs(StatusProto::ALREADY_EXISTS)); -} - -TEST_F(IcingSearchEngineTest, - SetSchemaDuplicatePropertiesReturnsAlreadyExists) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Create a schema with an Email type with properties { "title", "body" and - // "title" } - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), - ProtoStatusIs(StatusProto::ALREADY_EXISTS)); -} - -TEST_F(IcingSearchEngineTest, SetSchema) { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(1000); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); +TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) { + IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + DocumentProto document = CreateMessageDocument("namespace", "uri"); + EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); + EXPECT_THAT( + icing1.Get("namespace", "uri", GetResultSpecProto::default_instance()) + .document(), + EqualsProto(document)); - auto message_document = CreateMessageDocument("namespace", "uri"); - - auto schema_with_message = CreateMessageSchema(); - - SchemaProto schema_with_email; - SchemaTypeConfigProto* type = schema_with_email.add_types(); - type->set_schema_type("Email"); - PropertyConfigProto* property = type->add_properties(); - property->set_property_name("title"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - SchemaProto schema_with_email_and_message = schema_with_email; - type = schema_with_email_and_message.add_types(); - type->set_schema_type("Message"); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - // Create an arbitrary invalid schema - SchemaProto invalid_schema; - SchemaTypeConfigProto* empty_type = invalid_schema.add_types(); - empty_type->set_schema_type(""); - - // Make sure we can't set invalid schemas - SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema); - EXPECT_THAT(set_schema_result.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); - EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); - - // Can add an document of a set schema - set_schema_result = icing.SetSchema(schema_with_message); - EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK)); - EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); - EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); - - // Schema with Email doesn't have Message, so would result incompatible - // data - set_schema_result = icing.SetSchema(schema_with_email); - EXPECT_THAT(set_schema_result.status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); - - // Can expand the set of schema types and add an document of a new - // schema type - set_schema_result = icing.SetSchema(schema_with_email_and_message); - EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK)); - EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000)); - - EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); - // Can't add an document whose schema isn't set - auto photo_document = DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Photo") - .AddStringProperty("creator", "icing") - .Build(); - PutResultProto put_result_proto = icing.Put(photo_document); - EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); - EXPECT_THAT(put_result_proto.status().message(), - HasSubstr("'Photo' not found")); -} + // It's intentional that no PersistToDisk call is made before initializing a + // second instance of icing. -TEST_F(IcingSearchEngineTest, - SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing2.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::PARTIAL_LOSS)); + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::NONE)); - SchemaProto schema_with_no_indexed_property = CreateMessageSchema(); - schema_with_no_indexed_property.mutable_types(0) - ->mutable_properties(0) - ->clear_string_indexing_config(); - - SetSchemaResultProto set_schema_result = - icing.SetSchema(schema_with_no_indexed_property); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_new_schema_types()->Add("Message"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // Nothing will be index and Search() won't return anything. - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); + // The document shouldn't be found because we forgot to call + // PersistToDisk(LITE)! + EXPECT_THAT( + icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::NOT_FOUND)); + // Searching also shouldn't get us anything because the index wasn't + // recovered. SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto empty_result; - empty_result.mutable_status()->set_code(StatusProto::OK); - - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(empty_result)); - - SchemaProto schema_with_indexed_property = CreateMessageSchema(); - // Index restoration should be triggered here because new schema requires more - // properties to be indexed. - set_schema_result = icing.SetSchema(schema_with_indexed_property); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_set_schema_result = SetSchemaResultProto(); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_index_incompatible_changed_schema_types() - ->Add("Message"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - CreateMessageDocument("namespace", "uri"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SchemaTypeConfigProto person_proto = - SchemaTypeConfigBuilder() - .SetType("Person") - .AddProperty( - PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .Build(); - SchemaProto nested_schema = - SchemaBuilder() - .AddType(person_proto) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", - /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - - SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_new_schema_types()->Add("Email"); - expected_set_schema_result.mutable_new_schema_types()->Add("Person"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - DocumentProto document = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(1000) - .AddStringProperty("subject", - "Did you get the memo about TPS reports?") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Bill Lundbergh") - .Build()) - .Build(); - - // "sender.name" should get assigned property id 0 and subject should get - // property id 1. - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // document should match a query for 'Bill' in 'sender.name', but not in - // 'subject' - SearchSpecProto search_spec; - search_spec.set_query("sender.name:Bill"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto result; - result.mutable_status()->set_code(StatusProto::OK); - *result.mutable_results()->Add()->mutable_document() = document; SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); - - SearchResultProto empty_result; - empty_result.mutable_status()->set_code(StatusProto::OK); - search_spec.set_query("subject:Bill"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(empty_result)); - - // Now update the schema with index_nested_properties=false. This should - // reassign property ids, lead to an index rebuild and ensure that nothing - // match a query for "Bill". - SchemaProto no_nested_schema = - SchemaBuilder() - .AddType(person_proto) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", - /*index_nested_properties=*/false) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - - set_schema_result = icing.SetSchema(no_nested_schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_set_schema_result = SetSchemaResultProto(); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_index_incompatible_changed_schema_types() - ->Add("Email"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // document shouldn't match a query for 'Bill' in either 'sender.name' or - // 'subject' - search_spec.set_query("sender.name:Bill"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(empty_result)); - - search_spec.set_query("subject:Bill"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(empty_result)); + icing2.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -TEST_F(IcingSearchEngineTest, - ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // 'body' should have a property id of 0 and 'subject' should have a property - // id of 1. - SchemaProto email_with_body_schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - - SetSchemaResultProto set_schema_result = - icing.SetSchema(email_with_body_schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_new_schema_types()->Add("Email"); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // Create a document with only a subject property. - DocumentProto document = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(1000) - .AddStringProperty("subject", - "Did you get the memo about TPS reports?") - .Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // We should be able to retrieve the document by searching for 'tps' in - // 'subject'. - SearchSpecProto search_spec; - search_spec.set_query("subject:tps"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto result; - result.mutable_status()->set_code(StatusProto::OK); - *result.mutable_results()->Add()->mutable_document() = document; +TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) { + DocumentProto document = CreateMessageDocument("namespace", "uri"); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); - - // Now update the schema to remove the 'body' field. This is backwards - // incompatible, but document should be preserved because it doesn't contain a - // 'body' field. If the index is correctly rebuilt, then 'subject' will now - // have a property id of 0. If not, then the hits in the index will still have - // have a property id of 1 and therefore it won't be found. - SchemaProto email_no_body_schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty( - PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); + IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); + EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); + EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk()); + EXPECT_THAT( + icing1.Get("namespace", "uri", GetResultSpecProto::default_instance()) + .document(), + EqualsProto(document)); - set_schema_result = icing.SetSchema( - email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_set_schema_result = SetSchemaResultProto(); - expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); - expected_set_schema_result.mutable_index_incompatible_changed_schema_types() - ->Add("Email"); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // We should be able to retrieve the document by searching for 'tps' in - // 'subject'. - search_spec.set_query("subject:tps"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); -} + IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); + InitializeResultProto init_result = icing2.Initialize(); + EXPECT_THAT(init_result.status(), ProtoIsOk()); + EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), + Eq(InitializeStatsProto::NO_DATA_LOSS)); + EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), + Eq(InitializeStatsProto::NONE)); -TEST_F( - IcingSearchEngineTest, - ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + // A checksum mismatch gets reported as an IO error. The document store and + // index didn't have their derived files included in the checksum previously, + // so reinitializing will trigger a checksum mismatch. + EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::IO_ERROR)); - // 'body' should have a property id of 0 and 'subject' should have a property - // id of 1. - SchemaProto email_with_body_schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); + // Schema is still intact. + GetSchemaResultProto expected_get_schema_result_proto; + expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - SetSchemaResultProto set_schema_result = - icing.SetSchema(email_with_body_schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_new_schema_types()->Add("Email"); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + EXPECT_THAT(icing2.GetSchema(), + EqualsProto(expected_get_schema_result_proto)); - // Create a document with only a subject property. - DocumentProto document = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(1000) - .AddStringProperty("subject", - "Did you get the memo about TPS reports?") - .Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + // The document should be found because we called PersistToDisk(LITE)! + EXPECT_THAT( + icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()) + .document(), + EqualsProto(document)); - // We should be able to retrieve the document by searching for 'tps' in - // 'subject'. + // Recovered index is still intact. SearchSpecProto search_spec; - search_spec.set_query("subject:tps"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("message"); // Content in the Message document. - SearchResultProto result; - result.mutable_status()->set_code(StatusProto::OK); - *result.mutable_results()->Add()->mutable_document() = document; + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + document; SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); - - // Now update the schema to remove the 'body' field. This is backwards - // incompatible, but document should be preserved because it doesn't contain a - // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to' - // will now have property ids of 0 and 1 respectively. If not, then the hits - // in the index will still have have a property id of 1 and therefore it won't - // be found. - SchemaProto email_no_body_schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("to") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - - set_schema_result = icing.SetSchema( - email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_set_schema_result = SetSchemaResultProto(); - expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); - expected_set_schema_result.mutable_index_incompatible_changed_schema_types() - ->Add("Email"); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // We should be able to retrieve the document by searching for 'tps' in - // 'subject'. - search_spec.set_query("subject:tps"); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); -} - -TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SchemaTypeConfigProto email_schema_type = - SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty( - PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument("Person", - /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .Build(); - SchemaProto nested_schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Person") - .AddProperty(PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("company") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(email_schema_type) - .Build(); - - SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_new_schema_types()->Add("Email"); - expected_set_schema_result.mutable_new_schema_types()->Add("Person"); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // Create two documents - a person document and an email document - both docs - // should be deleted when we remove the 'company' field from the person type. - DocumentProto person_document = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Person") - .SetCreationTimestampMs(1000) - .AddStringProperty("name", "Bill Lundbergh") - .AddStringProperty("company", "Initech Corp.") - .Build(); - EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk()); - - DocumentProto email_document = - DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(1000) - .AddStringProperty("subject", - "Did you get the memo about TPS reports?") - .AddDocumentProperty("sender", person_document) - .Build(); - EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); - - // We should be able to retrieve both documents. - GetResultProto get_result = - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()); - EXPECT_THAT(get_result.status(), ProtoIsOk()); - EXPECT_THAT(get_result.document(), EqualsProto(person_document)); - - get_result = - icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()); - EXPECT_THAT(get_result.status(), ProtoIsOk()); - EXPECT_THAT(get_result.document(), EqualsProto(email_document)); - - // Now update the schema to remove the 'company' field. This is backwards - // incompatible, *both* documents should be deleted because both fail - // validation (they each contain a 'Person' that has a non-existent property). - nested_schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( - PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(email_schema_type) - .Build(); - - set_schema_result = icing.SetSchema( - nested_schema, /*ignore_errors_and_delete_documents=*/true); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_set_schema_result = SetSchemaResultProto(); - expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person"); - expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); - expected_set_schema_result.mutable_index_incompatible_changed_schema_types() - ->Add("Email"); - expected_set_schema_result.mutable_index_incompatible_changed_schema_types() - ->Add("Person"); - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - // Both documents should be deleted now. - get_result = - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()); - EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); - - get_result = - icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()); - EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); + icing2.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); } -// TODO(b/256022027): add unit tests for join incompatible schema change to make -// sure the joinable cache is rebuilt correctly. - -TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SchemaProto schema_with_optional_subject; - auto type = schema_with_optional_subject.add_types(); - type->set_schema_type("email"); - - // Add a OPTIONAL property - auto property = type->add_properties(); - property->set_property_name("subject"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(), - ProtoIsOk()); - - DocumentProto email_document_without_subject = - DocumentBuilder() - .SetKey("namespace", "without_subject") - .SetSchema("email") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto email_document_with_subject = - DocumentBuilder() - .SetKey("namespace", "with_subject") - .SetSchema("email") - .AddStringProperty("subject", "foo") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk()); - - SchemaProto schema_with_required_subject; - type = schema_with_required_subject.add_types(); - type->set_schema_type("email"); - - // Add a REQUIRED property - property = type->add_properties(); - property->set_property_name("subject"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - - // Can't set the schema since it's incompatible - SetSchemaResultProto set_schema_result = - icing.SetSchema(schema_with_required_subject); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result_proto; - expected_set_schema_result_proto.mutable_status()->set_code( - StatusProto::FAILED_PRECONDITION); - expected_set_schema_result_proto.mutable_status()->set_message( - "Schema is incompatible."); - expected_set_schema_result_proto.add_incompatible_schema_types("email"); - - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto)); - - // Force set it - set_schema_result = - icing.SetSchema(schema_with_required_subject, - /*ignore_errors_and_delete_documents=*/true); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result_proto.mutable_status()->clear_message(); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto)); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = email_document_with_subject; - - EXPECT_THAT(icing.Get("namespace", "with_subject", - GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // The document without a subject got deleted because it failed validation - // against the new schema - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, without_subject) not found."); - expected_get_result_proto.clear_document(); - - EXPECT_THAT(icing.Get("namespace", "without_subject", - GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SchemaProto schema; - auto type = schema.add_types(); - type->set_schema_type("email"); - type = schema.add_types(); - type->set_schema_type("message"); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto email_document = - DocumentBuilder() - .SetKey("namespace", "email_uri") - .SetSchema("email") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto message_document = - DocumentBuilder() - .SetKey("namespace", "message_uri") - .SetSchema("message") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk()); - - // Clear the schema and only add the "email" type, essentially deleting the - // "message" type - SchemaProto new_schema; - type = new_schema.add_types(); - type->set_schema_type("email"); - - // Can't set the schema since it's incompatible - SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_result; - expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION); - expected_result.mutable_status()->set_message("Schema is incompatible."); - expected_result.add_deleted_schema_types("message"); - - EXPECT_THAT(set_schema_result, EqualsProto(expected_result)); - - // Force set it - set_schema_result = - icing.SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/true); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - expected_result.mutable_status()->set_code(StatusProto::OK); - expected_result.mutable_status()->clear_message(); - EXPECT_THAT(set_schema_result, EqualsProto(expected_result)); - - // "email" document is still there - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = email_document; - - EXPECT_THAT(icing.Get("namespace", "email_uri", - GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // "message" document got deleted - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, message_uri) not found."); - expected_get_result_proto.clear_document(); - - EXPECT_THAT(icing.Get("namespace", "message_uri", - GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetSchemaNotFound) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - EXPECT_THAT(icing.GetSchema().status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); -} - -TEST_F(IcingSearchEngineTest, GetSchemaOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - GetSchemaResultProto expected_get_schema_result_proto; - expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetSchemaTypeFailedPrecondition) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - GetSchemaTypeResultProto get_schema_type_result_proto = - icing.GetSchemaType("nonexistent_schema"); - EXPECT_THAT(get_schema_type_result_proto.status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(get_schema_type_result_proto.status().message(), - HasSubstr("Schema not set")); -} - -TEST_F(IcingSearchEngineTest, GetSchemaTypeOk) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - GetSchemaTypeResultProto expected_get_schema_type_result_proto; - expected_get_schema_type_result_proto.mutable_status()->set_code( - StatusProto::OK); - *expected_get_schema_type_result_proto.mutable_schema_type_config() = - CreateMessageSchema().types(0); - EXPECT_THAT(icing.GetSchemaType(CreateMessageSchema().types(0).schema_type()), - EqualsProto(expected_get_schema_type_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetDocument) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Simple put and get - ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - CreateMessageDocument("namespace", "uri"); - ASSERT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Put an invalid document - PutResultProto put_result_proto = icing.Put(DocumentProto()); - EXPECT_THAT(put_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); - EXPECT_THAT(put_result_proto.status().message(), - HasSubstr("'namespace' is empty")); - - // Get a non-existing key - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (wrong, uri) not found."); - expected_get_result_proto.clear_document(); - ASSERT_THAT(icing.Get("wrong", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetDocumentProjectionEmpty) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - GetResultSpecProto result_spec; - TypePropertyMask* mask = result_spec.add_type_property_masks(); - mask->set_schema_type(document.schema()); - mask->add_paths(""); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document; - expected_get_result_proto.mutable_document()->clear_properties(); - ASSERT_THAT(icing.Get("namespace", "uri", result_spec), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetDocumentWildCardProjectionEmpty) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - GetResultSpecProto result_spec; - TypePropertyMask* mask = result_spec.add_type_property_masks(); - mask->set_schema_type("*"); - mask->add_paths(""); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document; - expected_get_result_proto.mutable_document()->clear_properties(); - ASSERT_THAT(icing.Get("namespace", "uri", result_spec), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleFieldPaths) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // 1. Add an email document - DocumentProto document = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("subject", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - GetResultSpecProto result_spec; - TypePropertyMask* mask = result_spec.add_type_property_masks(); - mask->set_schema_type("Email"); - mask->add_paths("sender.name"); - mask->add_paths("subject"); - - // 2. Verify that the returned result only contains the 'sender.name' - // property and the 'subject' property. - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .Build()) - .AddStringProperty("subject", "Hello World!") - .Build(); - ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, GetDocumentWildcardProjectionMultipleFieldPaths) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // 1. Add an email document - DocumentProto document = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("subject", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - GetResultSpecProto result_spec; - TypePropertyMask* mask = result_spec.add_type_property_masks(); - mask->set_schema_type("*"); - mask->add_paths("sender.name"); - mask->add_paths("subject"); - - // 2. Verify that the returned result only contains the 'sender.name' - // property and the 'subject' property. - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .Build()) - .AddStringProperty("subject", "Hello World!") - .Build(); - ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - GetDocumentSpecificProjectionOverridesWildcardProjection) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // 1. Add an email document - DocumentProto document = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("subject", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // 2. Add type property masks for the wildcard and the specific type of the - // document 'Email'. The wildcard should be ignored and only the 'Email' - // projection should apply. - GetResultSpecProto result_spec; - TypePropertyMask* mask = result_spec.add_type_property_masks(); - mask->set_schema_type("*"); - mask->add_paths("subject"); - mask = result_spec.add_type_property_masks(); - mask->set_schema_type("Email"); - mask->add_paths("body"); - - // 3. Verify that the returned result only contains the 'body' property. - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); - result_spec.mutable_snippet_spec()->set_num_to_snippet(1); - - SearchResultProto results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(2)); - - const DocumentProto& document = results.results(0).document(); - EXPECT_THAT(document, EqualsProto(document_two)); - - const SnippetProto& snippet = results.results(0).snippet(); - EXPECT_THAT(snippet.entries(), SizeIs(1)); - EXPECT_THAT(snippet.entries(0).property_name(), Eq("body")); - std::string_view content = - GetString(&document, snippet.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, snippet.entries(0)), - ElementsAre("message body")); - EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message")); - - EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); - EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty()); - - search_spec.set_query("foo"); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchReturnsScoresDocumentScore) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); - document_one.set_score(93); - document_one.set_creation_timestamp_ms(10000); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); - document_two.set_score(15); - document_two.set_creation_timestamp_ms(12000); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - // Rank by DOCUMENT_SCORE and ensure that the score field is populated with - // document score. - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - SearchResultProto results = icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(2)); - - EXPECT_THAT(results.results(0).document(), EqualsProto(document_one)); - EXPECT_THAT(results.results(0).score(), 93); - EXPECT_THAT(results.results(1).document(), EqualsProto(document_two)); - EXPECT_THAT(results.results(1).score(), 15); -} - -TEST_F(IcingSearchEngineTest, SearchReturnsScoresCreationTimestamp) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); - document_one.set_score(93); - document_one.set_creation_timestamp_ms(10000); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); - document_two.set_score(15); - document_two.set_creation_timestamp_ms(12000); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - // Rank by CREATION_TS and ensure that the score field is populated with - // creation ts. - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); - - SearchResultProto results = icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(2)); - - EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); - EXPECT_THAT(results.results(0).score(), 12000); - EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); - EXPECT_THAT(results.results(1).score(), 10000); -} - -TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(1000); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = CreateMessageDocument("namespace", "uri1"); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = CreateMessageDocument("namespace", "uri2"); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(1); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document_two; - - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - - EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(), - Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(), - Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(), - Eq(1000)); - - // The token is a random number so we don't verify it. - expected_search_result_proto.set_next_page_token( - search_result_proto.next_page_token()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query(""); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(0); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query(""); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(-5); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code( - StatusProto::INVALID_ARGUMENT); - expected_search_result_proto.mutable_status()->set_message( - "ResultSpecProto.num_per_page cannot be negative."); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchNonPositivePageTotalBytesLimitReturnsInvalidArgument) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query(""); - - ResultSpecProto result_spec; - result_spec.set_num_total_bytes_per_page_threshold(-1); - - SearchResultProto actual_results1 = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(actual_results1.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); - - result_spec.set_num_total_bytes_per_page_threshold(0); - SearchResultProto actual_results2 = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(actual_results2.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) { - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); - - { - // Set the schema up beforehand. - IcingSearchEngine icing(icing_options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - // Schema will be persisted to disk when icing goes out of scope. - } - - { - // Ensure that icing initializes the schema and section_manager - // properly from the pre-existing file. - IcingSearchEngine icing(icing_options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - // The index and document store will be persisted to disk when icing goes - // out of scope. - } - - { - // Ensure that the index is brought back up without problems and we - // can query for the content that we expect. - IcingSearchEngine icing(icing_options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - CreateMessageDocument("namespace", "uri"); - - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - search_spec.set_query("foo"); - - SearchResultProto empty_result; - empty_result.mutable_status()->set_code(StatusProto::OK); - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(empty_result)); - } -} - -TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(1000); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - // Empty result, no next-page token - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - - EXPECT_THAT(search_result_proto.query_stats().latency_ms(), Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().parse_query_latency_ms(), - Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().scoring_latency_ms(), Eq(1000)); - EXPECT_THAT(search_result_proto.query_stats().ranking_latency_ms(), Eq(0)); - EXPECT_THAT(search_result_proto.query_stats().document_retrieval_latency_ms(), - Eq(0)); - EXPECT_THAT(search_result_proto.query_stats().lock_acquisition_latency_ms(), - Eq(1000)); - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates and inserts 5 documents - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); - DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(2); - - // Searches and gets the first page, 2 results - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document5; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document4; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); - uint64_t next_page_token = search_result_proto.next_page_token(); - // Since the token is a random number, we don't need to verify - expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // Second page, 2 results - expected_search_result_proto.clear_results(); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // Third page, 1 result - expected_search_result_proto.clear_results(); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - // Because there are no more results, we should not return the next page - // token. - expected_search_result_proto.clear_next_page_token(); - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // No more results - expected_search_result_proto.clear_results(); - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates and inserts 5 documents - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); - DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(2); - - // Searches and gets the first page, 2 results - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document5; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document4; - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); - uint64_t next_page_token = search_result_proto.next_page_token(); - // Since the token is a random number, we don't need to verify - expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // Second page, 2 results - expected_search_result_proto.clear_results(); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // Third page, 1 result - expected_search_result_proto.clear_results(); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - // Because there are no more results, we should not return the next page - // token. - expected_search_result_proto.clear_next_page_token(); - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // No more results - expected_search_result_proto.clear_results(); - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchWithUnknownEnabledFeatureShouldReturnError) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - search_spec.add_enabled_features("BAD_FEATURE"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates and inserts 5 documents - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); - DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(2); - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); - result_spec.mutable_snippet_spec()->set_num_to_snippet(3); - - // Searches and gets the first page, 2 results with 2 snippets - SearchResultProto search_result = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - ASSERT_THAT(search_result.status(), ProtoIsOk()); - ASSERT_THAT(search_result.results(), SizeIs(2)); - ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); - - const DocumentProto& document_result_1 = search_result.results(0).document(); - EXPECT_THAT(document_result_1, EqualsProto(document5)); - const SnippetProto& snippet_result_1 = search_result.results(0).snippet(); - EXPECT_THAT(snippet_result_1.entries(), SizeIs(1)); - EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body")); - std::string_view content = GetString( - &document_result_1, snippet_result_1.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)), - ElementsAre("message body")); - EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)), - ElementsAre("message")); - - const DocumentProto& document_result_2 = search_result.results(1).document(); - EXPECT_THAT(document_result_2, EqualsProto(document4)); - const SnippetProto& snippet_result_2 = search_result.results(1).snippet(); - EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body")); - content = GetString(&document_result_2, - snippet_result_2.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)), - ElementsAre("message body")); - EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)), - ElementsAre("message")); - - // Second page, 2 result with 1 snippet - search_result = icing.GetNextPage(search_result.next_page_token()); - ASSERT_THAT(search_result.status(), ProtoIsOk()); - ASSERT_THAT(search_result.results(), SizeIs(2)); - ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); - - const DocumentProto& document_result_3 = search_result.results(0).document(); - EXPECT_THAT(document_result_3, EqualsProto(document3)); - const SnippetProto& snippet_result_3 = search_result.results(0).snippet(); - EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body")); - content = GetString(&document_result_3, - snippet_result_3.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)), - ElementsAre("message body")); - EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)), - ElementsAre("message")); - - EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2)); - EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty()); - - // Third page, 1 result with 0 snippets - search_result = icing.GetNextPage(search_result.next_page_token()); - ASSERT_THAT(search_result.status(), ProtoIsOk()); - ASSERT_THAT(search_result.results(), SizeIs(1)); - ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); - - EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1)); - EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty()); -} - -TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(1); - - // Searches and gets the first page, 1 result - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); - uint64_t next_page_token = search_result_proto.next_page_token(); - // Since the token is a random number, we don't need to verify - expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - // Now document1 is still to be fetched. - - // Invalidates token - icing.InvalidateNextPageToken(next_page_token); - - // Tries to fetch the second page, no result since it's invalidated - expected_search_result_proto.clear_results(); - expected_search_result_proto.clear_next_page_token(); - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - AllPageTokensShouldBeInvalidatedAfterOptimization) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(1); - - // Searches and gets the first page, 1 result - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(search_result_proto.next_page_token(), Gt(kInvalidNextPageToken)); - uint64_t next_page_token = search_result_proto.next_page_token(); - // Since the token is a random number, we don't need to verify - expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - // Now document1 is still to be fetched. - - OptimizeResultProto optimize_result_proto; - optimize_result_proto.mutable_status()->set_code(StatusProto::OK); - optimize_result_proto.mutable_status()->set_message(""); - OptimizeResultProto actual_result = icing.Optimize(); - actual_result.clear_optimize_stats(); - ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto)); - - // Tries to fetch the second page, no results since all tokens have been - // invalidated during Optimize() - expected_search_result_proto.clear_results(); - expected_search_result_proto.clear_next_page_token(); - search_result_proto = icing.GetNextPage(next_page_token); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) { - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); - - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri1) not found."); - { - IcingSearchEngine icing(icing_options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - // Deletes document1 - ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); - const std::string document_log_path = - icing_options.base_dir() + "/document_dir/" + - DocumentLogCreator::GetDocumentLogFilename(); - int64_t document_log_size_before = - filesystem()->GetFileSize(document_log_path.c_str()); - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - int64_t document_log_size_after = - filesystem()->GetFileSize(document_log_path.c_str()); - - // Validates that document can't be found right after Optimize() - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - // Validates that document is actually removed from document log - EXPECT_THAT(document_log_size_after, Lt(document_log_size_before)); - } // Destroys IcingSearchEngine to make sure nothing is cached. - - IcingSearchEngine icing(icing_options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) { - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); - IcingSearchEngine icing(icing_options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Create a tmp dir that will be used in Optimize() to swap files, - // this validates that any tmp dirs will be deleted before using. - const std::string tmp_dir = - icing_options.base_dir() + "/document_dir_optimize_tmp"; - - const std::string tmp_file = tmp_dir + "/file"; - ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str())); - ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str())); - ASSERT_TRUE(fd.is_valid()); - ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); - fd.reset(); - - EXPECT_THAT(icing.Optimize().status(), ProtoIsOk()); - - EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str())); - EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str())); -} - -TEST_F(IcingSearchEngineTest, GetOptimizeInfoHasCorrectStats) { - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(500) - .Build(); - - { - auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(1000); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Just initialized, nothing is optimizable yet. - GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo(); - EXPECT_THAT(optimize_info.status(), ProtoIsOk()); - EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0)); - EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0)); - EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); - - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - // Only have active documents, nothing is optimizable yet. - optimize_info = icing.GetOptimizeInfo(); - EXPECT_THAT(optimize_info.status(), ProtoIsOk()); - EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0)); - EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0)); - EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); - - // Deletes document1 - ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); - - optimize_info = icing.GetOptimizeInfo(); - EXPECT_THAT(optimize_info.status(), ProtoIsOk()); - EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1)); - EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0)); - EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); - int64_t first_estimated_optimizable_bytes = - optimize_info.estimated_optimizable_bytes(); - - // Add a second document, but it'll be expired since the time (1000) is - // greater than the document's creation timestamp (100) + the document's ttl - // (500) - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - optimize_info = icing.GetOptimizeInfo(); - EXPECT_THAT(optimize_info.status(), ProtoIsOk()); - EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2)); - EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), - Gt(first_estimated_optimizable_bytes)); - EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0)); - - // Optimize - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - } - - { - // Recreate with new time - auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(5000); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Nothing is optimizable now that everything has been optimized away. - GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo(); - EXPECT_THAT(optimize_info.status(), ProtoIsOk()); - EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0)); - EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0)); - EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000)); - } -} - -TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) { - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); - DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - - // Validates that Get() and Put() are good right after Optimize() - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) - .status() - .code(), - Eq(StatusProto::NOT_FOUND)); - *expected_get_result_proto.mutable_document() = document3; - EXPECT_THAT( - icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - EXPECT_THAT(icing.Put(document4).status(), ProtoIsOk()); - } // Destroys IcingSearchEngine to make sure nothing is cached. - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) - .status() - .code(), - Eq(StatusProto::NOT_FOUND)); - *expected_get_result_proto.mutable_document() = document3; - EXPECT_THAT( - icing.Get("namespace", "uri3", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - *expected_get_result_proto.mutable_document() = document4; - EXPECT_THAT( - icing.Get("namespace", "uri4", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, - GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) { - DocumentProto empty_document1 = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto empty_document2 = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto empty_document3 = - DocumentBuilder() - .SetKey("namespace", "uri3") - .SetSchema("Message") - .AddStringProperty("body", "") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - - // Validates that Get() and Put() are good right after Optimize() - *expected_get_result_proto.mutable_document() = empty_document1; - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) - .status() - .code(), - Eq(StatusProto::NOT_FOUND)); - EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) { - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - - // Validates that Delete() works right after Optimize() - EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code( - StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri1) not found."); - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - } // Destroys IcingSearchEngine to make sure nothing is cached. - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri1) not found."); - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri2) not found."); - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); -} - -TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) { - // Setup filesystem to fail - auto mock_filesystem = std::make_unique(); - bool just_swapped_files = false; - auto create_dir_lambda = [this, &just_swapped_files](const char* dir_name) { - if (just_swapped_files) { - // We should fail the first call immediately after swapping files. - just_swapped_files = false; - return false; - } - return filesystem()->CreateDirectoryRecursively(dir_name); - }; - ON_CALL(*mock_filesystem, CreateDirectoryRecursively) - .WillByDefault(create_dir_lambda); - - auto swap_lambda = [&just_swapped_files](const char* first_dir, - const char* second_dir) { - just_swapped_files = true; - return false; - }; - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"), - HasSubstr("document_dir"))) - .WillByDefault(swap_lambda); - TestIcingSearchEngine icing(options, std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // The mocks should cause an unrecoverable error during Optimize - returning - // INTERNAL. - ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL)); - - // Ordinary operations should fail safely. - SchemaProto simple_schema; - auto type = simple_schema.add_types(); - type->set_schema_type("type0"); - auto property = type->add_properties(); - property->set_property_name("prop0"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - DocumentProto simple_doc = DocumentBuilder() - .SetKey("namespace0", "uri0") - .SetSchema("type0") - .AddStringProperty("prop0", "foo") - .Build(); - - SearchSpecProto search_spec; - search_spec.set_query("foo"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - ResultSpecProto result_spec; - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); - - EXPECT_THAT(icing.SetSchema(simple_schema).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.Put(simple_doc).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing - .Get(simple_doc.namespace_(), simple_doc.uri(), - GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - - // Reset should get icing back to a safe (empty) and working state. - EXPECT_THAT(icing.Reset().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk()); - EXPECT_THAT(icing - .Get(simple_doc.namespace_(), simple_doc.uri(), - GetResultSpecProto::default_instance()) - .status(), - ProtoIsOk()); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), - ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, DeleteBySchemaType) { - SchemaProto schema; - // Add an email type - auto type = schema.add_types(); - type->set_schema_type("email"); - auto property = type->add_properties(); - property->set_property_name("subject"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::EXACT_ONLY); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - // Add an message type - type = schema.add_types(); - type->set_schema_type("message"); - property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::EXACT_ONLY); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("message") - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("email") - .AddStringProperty("subject", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(7); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete the first type. The first doc should be irretrievable. The - // second should still be present. - DeleteBySchemaTypeResultProto result_proto = - icing.DeleteBySchemaType("message"); - EXPECT_THAT(result_proto.status(), ProtoIsOk()); - DeleteStatsProto exp_stats; - exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE); - exp_stats.set_latency_ms(7); - exp_stats.set_num_documents_deleted(1); - EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace1, uri1) not found."); - expected_get_result_proto.clear_document(); - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Search for "message", only document2 should show up. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("message"); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) { - SchemaProto schema = CreateMessageSchema(); - // Add an email type - SchemaProto tmp = CreateEmailSchema(); - *schema.add_types() = tmp.types(0); - - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema(schema.types(0).schema_type()) - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema(schema.types(1).schema_type()) - .AddStringProperty("subject", "subject subject2") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete the first type. The first doc should be irretrievable. The - // second should still be present. - SearchSpecProto search_spec; - search_spec.add_schema_type_filters(schema.types(0).schema_type()); - EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk()); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace1, uri1) not found."); - expected_get_result_proto.clear_document(); - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - search_spec = SearchSpecProto::default_instance(); - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, DeleteByNamespace) { - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace3", "uri3") - .SetSchema("Message") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(7); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document3; - EXPECT_THAT( - icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete namespace1. Document1 and document2 should be irretrievable. - // Document3 should still be present. - DeleteByNamespaceResultProto result_proto = - icing.DeleteByNamespace("namespace1"); - EXPECT_THAT(result_proto.status(), ProtoIsOk()); - DeleteStatsProto exp_stats; - exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE); - exp_stats.set_latency_ms(7); - exp_stats.set_num_documents_deleted(2); - EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace1, uri1) not found."); - expected_get_result_proto.clear_document(); - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace1, uri2) not found."); - expected_get_result_proto.clear_document(); - EXPECT_THAT( - icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document3; - EXPECT_THAT( - icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Search for "message", only document3 should show up. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("message"); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) { - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete the first namespace. The first doc should be irretrievable. The - // second should still be present. - SearchSpecProto search_spec; - search_spec.add_namespace_filters("namespace1"); - EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk()); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace1, uri1) not found."); - expected_get_result_proto.clear_document(); - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - search_spec = SearchSpecProto::default_instance(); - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, DeleteByQuery) { - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(7); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete all docs containing 'body1'. The first doc should be irretrievable. - // The second should still be present. - SearchSpecProto search_spec; - search_spec.set_query("body1"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec); - EXPECT_THAT(result_proto.status(), ProtoIsOk()); - DeleteByQueryStatsProto exp_stats; - exp_stats.set_latency_ms(7); - exp_stats.set_num_documents_deleted(1); - exp_stats.set_query_length(search_spec.query().length()); - exp_stats.set_num_terms(1); - exp_stats.set_num_namespaces_filtered(0); - exp_stats.set_num_schema_types_filtered(0); - exp_stats.set_parse_query_latency_ms(7); - exp_stats.set_document_removal_latency_ms(7); - EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace1, uri1) not found."); - expected_get_result_proto.clear_document(); - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - search_spec = SearchSpecProto::default_instance(); - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, DeleteByQueryReturnInfo) { - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace2", "uri3") - .SetSchema("Message") - .AddStringProperty("body", "message body3") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(7); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document3; - EXPECT_THAT( - icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete all docs to test the information is correctly grouped. - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - DeleteByQueryResultProto result_proto = - icing.DeleteByQuery(search_spec, true); - EXPECT_THAT(result_proto.status(), ProtoIsOk()); - DeleteByQueryStatsProto exp_stats; - exp_stats.set_latency_ms(7); - exp_stats.set_num_documents_deleted(3); - exp_stats.set_query_length(search_spec.query().length()); - exp_stats.set_num_terms(1); - exp_stats.set_num_namespaces_filtered(0); - exp_stats.set_num_schema_types_filtered(0); - exp_stats.set_parse_query_latency_ms(7); - exp_stats.set_document_removal_latency_ms(7); - EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats)); - - // Check that DeleteByQuery can return information for deleted documents. - DeleteByQueryResultProto::DocumentGroupInfo info1, info2; - info1.set_namespace_("namespace1"); - info1.set_schema("Message"); - info1.add_uris("uri1"); - info2.set_namespace_("namespace2"); - info2.set_schema("Message"); - info2.add_uris("uri3"); - info2.add_uris("uri2"); - EXPECT_THAT(result_proto.deleted_documents(), - UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2))); - - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()) - .status() - .code(), - Eq(StatusProto::NOT_FOUND)); - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()) - .status() - .code(), - Eq(StatusProto::NOT_FOUND)); - EXPECT_THAT( - icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()) - .status() - .code(), - Eq(StatusProto::NOT_FOUND)); -} - -TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) { - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "message body1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Delete all docs containing 'foo', which should be none of them. Both docs - // should still be present. - SearchSpecProto search_spec; - search_spec.set_query("foo"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - expected_get_result_proto.mutable_status()->clear_message(); - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - search_spec = SearchSpecProto::default_instance(); - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) { - // Creates 3 test schemas - SchemaProto schema1 = SchemaProto(CreateMessageSchema()); - - SchemaProto schema2 = SchemaProto(schema1); - auto new_property2 = schema2.mutable_types(0)->add_properties(); - new_property2->set_property_name("property2"); - new_property2->set_data_type(PropertyConfigProto::DataType::STRING); - new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - new_property2->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - new_property2->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - SchemaProto schema3 = SchemaProto(schema2); - auto new_property3 = schema3.mutable_types(0)->add_properties(); - new_property3->set_property_name("property3"); - new_property3->set_data_type(PropertyConfigProto::DataType::STRING); - new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - new_property3->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - new_property3->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - - // Validates that SetSchema() works right after Optimize() - EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk()); - } // Destroys IcingSearchEngine to make sure nothing is cached. - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) { - DocumentProto document = CreateMessageDocument("namespace", "uri"); - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document; - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); - - // Validates that Search() works right after Optimize() - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - } // Destroys IcingSearchEngine to make sure nothing is cached. - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) { - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - { - // Initializes a normal icing to create files needed - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - } - - // Creates a mock filesystem in which DeleteDirectoryRecursively() always - // fails. This will fail IcingSearchEngine::OptimizeDocumentStore() and makes - // it return ABORTED_ERROR. - auto mock_filesystem = std::make_unique(); - ON_CALL(*mock_filesystem, - DeleteDirectoryRecursively(HasSubstr("_optimize_tmp"))) - .WillByDefault(Return(false)); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED)); - - // Now optimization is aborted, we verify that document-related functions - // still work as expected. - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - - EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("m"); - search_spec.set_term_match_type(TermMatchType::PREFIX); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - OptimizationShouldRecoverIfFileDirectoriesAreMissing) { - // Creates a mock filesystem in which SwapFiles() always fails and deletes the - // directories. This will fail IcingSearchEngine::OptimizeDocumentStore(). - auto mock_filesystem = std::make_unique(); - ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"), - HasSubstr("document_dir"))) - .WillByDefault([this](const char* one, const char* two) { - filesystem()->DeleteDirectoryRecursively(one); - filesystem()->DeleteDirectoryRecursively(two); - return false; - }); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - - // Optimize() fails due to filesystem error - OptimizeResultProto result = icing.Optimize(); - EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); - // Should rebuild the index for data loss. - EXPECT_THAT(result.optimize_stats().index_restoration_mode(), - Eq(OptimizeStatsProto::FULL_INDEX_REBUILD)); - - // Document is not found because original file directory is missing - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri) not found."); - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - DocumentProto new_document = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "new body") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("m"); - search_spec.set_term_match_type(TermMatchType::PREFIX); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - - // Searching old content returns nothing because original file directory is - // missing - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - search_spec.set_query("n"); - - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - new_document; - - // Searching new content returns the new document - search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) { - // Creates a mock filesystem in which SwapFiles() always fails and empties the - // directories. This will fail IcingSearchEngine::OptimizeDocumentStore(). - auto mock_filesystem = std::make_unique(); - ON_CALL(*mock_filesystem, SwapFiles(HasSubstr("document_dir_optimize_tmp"), - HasSubstr("document_dir"))) - .WillByDefault([this](const char* one, const char* two) { - filesystem()->DeleteDirectoryRecursively(one); - filesystem()->CreateDirectoryRecursively(one); - filesystem()->DeleteDirectoryRecursively(two); - filesystem()->CreateDirectoryRecursively(two); - return false; - }); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - - // Optimize() fails due to filesystem error - OptimizeResultProto result = icing.Optimize(); - EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); - // Should rebuild the index for data loss. - EXPECT_THAT(result.optimize_stats().index_restoration_mode(), - Eq(OptimizeStatsProto::FULL_INDEX_REBUILD)); - - // Document is not found because original files are missing - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri) not found."); - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - DocumentProto new_document = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "new body") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("m"); - search_spec.set_term_match_type(TermMatchType::PREFIX); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - - // Searching old content returns nothing because original files are missing - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - search_spec.set_query("n"); - - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - new_document; - - // Searching new content returns the new document - search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) { - SchemaProto schema; - auto type = schema.add_types(); - type->set_schema_type("Message"); - - auto body = type->add_properties(); - body->set_property_name("body"); - body->set_data_type(PropertyConfigProto::DataType::STRING); - body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - body->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - body->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(500) - .Build(); - - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document; - - // Time just has to be less than the document's creation timestamp (100) + the - // document's ttl (500) - auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(400); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Check that the document is returned as part of search results - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) { - SchemaProto schema; - auto type = schema.add_types(); - type->set_schema_type("Message"); - - auto body = type->add_properties(); - body->set_property_name("body"); - body->set_data_type(PropertyConfigProto::DataType::STRING); - body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - body->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - body->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(500) - .Build(); - - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - - // Time just has to be greater than the document's creation timestamp (100) + - // the document's ttl (500) - auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(700); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Check that the document is not returned as part of search results - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) { - SchemaProto schema; - auto type_config = schema.add_types(); - type_config->set_schema_type("message"); - - auto property = type_config->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - DocumentProto message_document = - DocumentBuilder() - .SetKey("namespace", "message_uri") - .SetSchema("message") - .AddStringProperty("body", "foo") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk()); - - // Make sure we can search for message document - SearchSpecProto search_spec; - search_spec.set_query("foo"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - - // The message isn't indexed, so we get nothing - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // With just the schema type filter, we can search for the message - search_spec.Clear(); - search_spec.add_schema_type_filters("message"); - - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - message_document; - - search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // Since SchemaTypeIds are assigned based on order in the SchemaProto, this - // will force a change in the DocumentStore's cached SchemaTypeIds - schema.clear_types(); - type_config = schema.add_types(); - type_config->set_schema_type("email"); - - // Adding a new indexed property will require reindexing - type_config = schema.add_types(); - type_config->set_schema_type("message"); - - property = type_config->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - search_spec.Clear(); - search_spec.set_query("foo"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.add_schema_type_filters("message"); - - // We can still search for the message document - search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) { - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - CreateMessageDocument("namespace", "uri"); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - CreateMessageDocument("namespace", "uri"); - - { - // Basic initialization/setup - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str())); - - // We should be able to recover from this and access all our previous data - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Checks that DocumentLog is still ok - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Checks that the index is still ok so we can search over it - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - - // Checks that Schema is still since it'll be needed to validate the document - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) { - { - // Basic initialization/setup - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - CreateMessageDocument("namespace", "uri"); - - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - const std::string schema_file = - absl_ports::StrCat(GetSchemaDir(), "/schema.pb"); - const std::string corrupt_data = "1234"; - EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(), - corrupt_data.size())); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::INTERNAL)); -} - -TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) { - { - // Basic initialization/setup - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = - CreateMessageDocument("namespace", "uri"); - - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - const std::string document_log_file = absl_ports::StrCat( - GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); - const std::string corrupt_data = "1234"; - EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(), - corrupt_data.data(), corrupt_data.size())); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::INTERNAL)); -} - -TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) { - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2_with_additional_property = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("additional", "content") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - { - // Initializes folder and schema - IcingSearchEngine icing(options, GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SchemaProto schema; - auto type = schema.add_types(); - type->set_schema_type("Message"); - - auto property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - property = type->add_properties(); - property->set_property_name("additional"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document2_with_additional_property).status(), - ProtoIsOk()); - - // Won't get us anything because "additional" isn't marked as an indexed - // property in the schema - SearchSpecProto search_spec; - search_spec.set_query("additional:content"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - { - // This schema will change the SchemaTypeIds from the previous schema_ - // (since SchemaTypeIds are assigned based on order of the types, and this - // new schema changes the ordering of previous types) - SchemaProto new_schema; - auto type = new_schema.add_types(); - type->set_schema_type("Email"); - - type = new_schema.add_types(); - type->set_schema_type("Message"); - - // Adding a new property changes the SectionIds (since SectionIds are - // assigned based on alphabetical order of indexed sections, marking - // "additional" as an indexed property will push the "body" property to a - // different SectionId) - auto property = type->add_properties(); - property->set_property_name("body"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - property = type->add_properties(); - property->set_property_name("additional"); - property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - property->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - property->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - // Write the marker file - std::string marker_filepath = - absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); - ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); - ASSERT_TRUE(sfd.is_valid()); - - // Write the new schema - FakeClock fake_clock; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr schema_store, - SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); - ICING_EXPECT_OK(schema_store->SetSchema(new_schema)); - } // Will persist new schema - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // We can insert a Email document since we kept the new schema - DocumentProto email_document = - DocumentBuilder() - .SetKey("namespace", "email_uri") - .SetSchema("Email") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = email_document; - - EXPECT_THAT(icing.Get("namespace", "email_uri", - GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - SearchSpecProto search_spec; - - // The section restrict will ensure we are using the correct, updated - // SectionId in the Index - search_spec.set_query("additional:content"); - - // Schema type filter will ensure we're using the correct, updated - // SchemaTypeId in the DocumentStore - search_spec.add_schema_type_filters("Message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2_with_additional_property; - - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) { - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - - { - // Initializes folder and schema, index one document - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - } // This should shut down IcingSearchEngine and persist anything it needs to - - { - FakeClock fake_clock; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr schema_store, - SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); - ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema())); - - // Puts a second document into DocumentStore but doesn't index it. - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock, - schema_store.get())); - std::unique_ptr document_store = - std::move(create_result.document_store); - - ICING_EXPECT_OK(document_store->Put(document2)); - } - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - // Index Restoration should be triggered here and document2 should be - // indexed. - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document1; - - // DocumentStore kept the additional document - EXPECT_THAT( - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - *expected_get_result_proto.mutable_document() = document2; - EXPECT_THAT( - icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // We indexed the additional document - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) { - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - CreateMessageDocument("namespace", "uri"); - - { - // Initializes folder and schema, index one document - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - // Pretend we lost the entire index - EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively( - absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str())); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Check that our index is ok by searching over the restored index - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) { - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - CreateMessageDocument("namespace", "uri"); - - { - // Initializes folder and schema, index one document - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), - ProtoIsOk()); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - // Pretend index is corrupted - const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb"; - ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str())); - ASSERT_TRUE(fd.is_valid()); - ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Check that our index is ok by searching over the restored index - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 documents and ensures the relationship in terms of document - // score is: document1 < document2 < document3 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(3) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - // Intentionally inserts the documents in the order that is different than - // their score order - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // Result should be in descending score order - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 documents and ensures the relationship of them is: - // document1 < document2 < document3 - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(1571111111111) - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(1572222222222) - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(3) - .SetCreationTimestampMs(1573333333333) - .Build(); - - // Intentionally inserts the documents in the order that is different than - // their score order - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - - // Results should not be ranked by score but returned in reverse insertion - // order. - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 documents and ensures the relationship in terms of creation - // timestamp score is: document1 < document2 < document3 - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetCreationTimestampMs(1571111111111) - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetCreationTimestampMs(1572222222222) - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetCreationTimestampMs(1573333333333) - .Build(); - - // Intentionally inserts the documents in the order that is different than - // their score order - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // Result should be in descending timestamp order - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 test documents - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - // Intentionally inserts the documents in a different order to eliminate the - // possibility that the following results are sorted in the default reverse - // insertion order. - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 > - // doc1 when ranked by USAGE_TYPE1_COUNT. - UsageReport usage_report_doc3 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0, - UsageReport::USAGE_TYPE1); - UsageReport usage_report_doc2 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0, - UsageReport::USAGE_TYPE1); - ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk()); - ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk()); - ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // Result should be in descending USAGE_TYPE1_COUNT order - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultShouldHaveDefaultOrderWithoutUsageCounts) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 test documents - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // None of the documents have usage reports. Result should be in the default - // reverse insertion order. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 test documents - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - // Intentionally inserts the documents in a different order to eliminate the - // possibility that the following results are sorted in the default reverse - // insertion order. - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when - // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP. - UsageReport usage_report_doc2 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000, - UsageReport::USAGE_TYPE1); - UsageReport usage_report_doc3 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000, - UsageReport::USAGE_TYPE1); - ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk()); - ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespace) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace1". - DocumentProto document = CreateEmailDocument( - "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, - "starbucks coffee", - "habit. birthday rewards. good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("coffee OR food"); - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - - // Result should be in descending score order - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - // Both doc5 and doc7 have "coffee" in name and text sections. - // However, doc5 has more matches in the text section. - // Documents with "food" are ranked lower as the term "food" is commonly - // present in this corpus, and thus, has a lower IDF. - EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), - ElementsAre("namespace1/uri5", // 'coffee' 3 times - "namespace1/uri7", // 'coffee' 2 times - "namespace1/uri1", // 'food' 2 times - "namespace1/uri4", // 'food' 2 times - "namespace1/uri2", // 'food' 1 time - "namespace1/uri6")); // 'food' 1 time -} - -TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceAdvanced) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace1". - DocumentProto document = CreateEmailDocument( - "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, - "starbucks coffee", - "habit. birthday rewards. good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("coffee OR food"); - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_advanced_scoring_expression("this.relevanceScore() * 2 + 1"); - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::ADVANCED_SCORING_EXPRESSION); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - - // Result should be in descending score order - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - // Both doc5 and doc7 have "coffee" in name and text sections. - // However, doc5 has more matches in the text section. - // Documents with "food" are ranked lower as the term "food" is commonly - // present in this corpus, and thus, has a lower IDF. - EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), - ElementsAre("namespace1/uri5", // 'coffee' 3 times - "namespace1/uri7", // 'coffee' 2 times - "namespace1/uri1", // 'food' 2 times - "namespace1/uri4", // 'food' 2 times - "namespace1/uri2", // 'food' 1 time - "namespace1/uri6")); // 'food' 1 time -} - -TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceNotOperator) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace1". - DocumentProto document = CreateEmailDocument( - "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza", - "thin-crust pizza. good and fast. nice coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, - "starbucks coffee", - "habit. birthday rewards. good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("coffee -starbucks"); - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - - // Result should be in descending score order - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - EXPECT_THAT( - GetUrisFromSearchResults(search_result_proto), - ElementsAre("namespace1/uri5", // 'coffee' 3 times, 'starbucks' 0 times - "namespace1/uri3")); // 'coffee' 1 times, 'starbucks' 0 times -} - -TEST_F(IcingSearchEngineTest, - Bm25fRelevanceScoringOneNamespaceSectionRestrict) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace1". - DocumentProto document = CreateEmailDocument( - "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = - CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18, - "peets coffee, best coffee", - "espresso. decaf. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri7", /*score=*/4, "starbucks", - "habit. birthday rewards. good coffee. brewed coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("subject:coffee OR body:food"); - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - - // Result should be in descending score order - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - // The term frequencies of "coffee" and "food" are calculated respectively - // from the subject section and the body section. - // Documents with "food" are ranked lower as the term "food" is commonly - // present in this corpus, and thus, has a lower IDF. - EXPECT_THAT( - GetUrisFromSearchResults(search_result_proto), - ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject - "namespace1/uri1", // 'food' 2 times in section body - "namespace1/uri4", // 'food' 2 times in section body - "namespace1/uri2", // 'food' 1 time in section body - "namespace1/uri6")); // 'food' 1 time in section body -} - -TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringTwoNamespaces) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace1". - DocumentProto document = CreateEmailDocument( - "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, - "starbucks coffee", - "habit. birthday rewards. good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace2". - document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10, - "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace2", "namespace2/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4, - "starbucks coffee", "good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("coffee OR food"); - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); - ResultSpecProto result_spec_proto; - result_spec_proto.set_num_per_page(16); - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec_proto); - - // Result should be in descending score order - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - // The two corpora have the same documents except for document 7, which in - // "namespace2" is much shorter than the average dcoument length, so it is - // boosted. - EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), - ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc - "namespace1/uri5", // 'coffee' 3 times - "namespace2/uri5", // 'coffee' 3 times - "namespace1/uri7", // 'coffee' 2 times - "namespace1/uri1", // 'food' 2 times - "namespace2/uri1", // 'food' 2 times - "namespace1/uri4", // 'food' 2 times - "namespace2/uri4", // 'food' 2 times - "namespace1/uri2", // 'food' 1 time - "namespace2/uri2", // 'food' 1 time - "namespace1/uri6", // 'food' 1 time - "namespace2/uri6")); // 'food' 1 time -} - -TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringWithNamespaceFilter) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace1". - DocumentProto document = CreateEmailDocument( - "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace1", "namespace1/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4, - "starbucks coffee", - "habit. birthday rewards. good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Create and index documents in namespace "namespace2". - document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10, - "sushi belmont", - "fresh fish. inexpensive. good sushi."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander", - "indian food. buffet. spicy food. kadai chicken."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4, - "panda express", - "chinese food. cheap. inexpensive. kung pao."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23, - "speederia pizza", - "thin-crust pizza. good and fast."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8, - "whole foods", - "salads. pizza. organic food. expensive."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee", - "espresso. decaf. brewed coffee. whole beans. excellent coffee."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument( - "namespace2", "namespace2/uri6", /*score=*/4, "costco", - "bulk. cheap whole beans. frozen fish. food samples."); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4, - "starbucks coffee", "good coffee"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("coffee OR food"); - // Now query only corpus 2 - search_spec.add_namespace_filters("namespace2"); - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - search_result_proto = icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()); - - // Result from namespace "namespace2" should be in descending score order - EXPECT_THAT(search_result_proto.status(), ProtoIsOk()); - // Both doc5 and doc7 have "coffee" in name and text sections. - // Even though doc5 has more matches in the text section, doc7's length is - // much shorter than the average corpus's length, so it's being boosted. - // Documents with "food" are ranked lower as the term "food" is commonly - // present in this corpus, and thus, has a lower IDF. - EXPECT_THAT(GetUrisFromSearchResults(search_result_proto), - ElementsAre("namespace2/uri7", // 'coffee' 2 times, short doc - "namespace2/uri5", // 'coffee' 3 times - "namespace2/uri1", // 'food' 2 times - "namespace2/uri4", // 'food' 2 times - "namespace2/uri2", // 'food' 1 time - "namespace2/uri6")); // 'food' 1 time -} - -TEST_F(IcingSearchEngineTest, - SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 test documents - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // None of the documents have usage reports. Result should be in the default - // reverse insertion order. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 test documents - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // Report usage for doc1 and doc2. The older timestamp 5000 shouldn't be - // overridden by 1000. The order will be doc1 > doc2 when ranked by - // USAGE_TYPE1_LAST_USED_TIMESTAMP. - UsageReport usage_report_doc1_time1 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/1000, - UsageReport::USAGE_TYPE1); - UsageReport usage_report_doc1_time5 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/5000, - UsageReport::USAGE_TYPE1); - UsageReport usage_report_doc2_time3 = CreateUsageReport( - /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/3000, - UsageReport::USAGE_TYPE1); - ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time5).status(), ProtoIsOk()); - ASSERT_THAT(icing.ReportUsage(usage_report_doc2_time3).status(), ProtoIsOk()); - ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time1).status(), ProtoIsOk()); - - // "m" will match both documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 documents and ensures the relationship in terms of document - // score is: document1 < document2 < document3 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(3) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - // Intentionally inserts the documents in the order that is different than - // their score order - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - // "m" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - // Result should be in ascending score order - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - scoring_spec.set_order_by(ScoringSpecProto::Order::ASC); - SearchResultProto search_result_proto = icing.Search( - search_spec, scoring_spec, ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingDuplicateNamespaceShouldReturnError) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - // Specify "namespace1" twice. This should result in an error. - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::NAMESPACE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace1"); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace2"); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace1"); - result_grouping = result_spec.add_result_groupings(); - entry = result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace1"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingDuplicateSchemaShouldReturnError) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - // Specify "Message" twice. This should result in an error. - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_schema("Message"); - entry = result_grouping->add_entry_groupings(); - entry->set_schema("nonexistentMessage"); - result_grouping = result_spec.add_result_groupings(); - result_grouping->set_max_results(1); - entry = result_grouping->add_entry_groupings(); - entry->set_schema("Message"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingDuplicateNamespaceAndSchemaSchemaShouldReturnError) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - // Specify "namespace1xMessage" twice. This should result in an error. - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace1"); - entry->set_schema("Message"); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace2"); - entry->set_schema("Message"); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace1"); - entry->set_schema("Message"); - result_grouping = result_spec.add_result_groupings(); - result_grouping->set_max_results(1); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace1"); - entry->set_schema("Message"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingNonPositiveMaxResultsShouldReturnError) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace2", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - // Specify zero results. This should result in an error. - ResultSpecProto result_spec; - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(0); - entry->set_namespace_("namespace1"); - entry->set_schema("Message"); - result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace2"); - entry->set_schema("Message"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); - - // Specify negative results. This should result in an error. - result_spec.mutable_result_groupings(0)->set_max_results(-1); - EXPECT_THAT(search_result_proto.status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiNamespaceGrouping) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 documents and ensures the relationship in terms of document - // score is: document1 < document2 < document3 < document4 < document5 < - // document6 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace1", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace2", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(3) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document4 = - DocumentBuilder() - .SetKey("namespace2", "uri/4") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(4) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document5 = - DocumentBuilder() - .SetKey("namespace3", "uri/5") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(5) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document6 = - DocumentBuilder() - .SetKey("namespace3", "uri/6") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(6) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk()); - - // "m" will match all 6 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::NAMESPACE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace1"); - result_grouping = result_spec.add_result_groupings(); - result_grouping->set_max_results(2); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace2"); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace3"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - - // The last result (document1) in namespace "namespace1" should not be - // included. "namespace2" and "namespace3" are grouped together. So only the - // two highest scored documents between the two (both of which are in - // "namespace3") should be returned. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document6; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document5; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiSchemaGrouping) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( - PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", - /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetScore(1) - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .AddDocumentProperty("sender", DocumentBuilder() - .SetKey("namespace", "uri1-sender") - .SetSchema("Person") - .AddStringProperty("name", "foo") - .Build()) - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Message") - .SetScore(2) - .SetCreationTimestampMs(10) - .AddStringProperty("body", "fo") - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace2", "uri3") - .SetSchema("Message") - .SetScore(3) - .SetCreationTimestampMs(10) - .AddStringProperty("body", "fo") - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - // "f" will match all 3 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("f"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_schema("Message"); - result_grouping = result_spec.add_result_groupings(); - result_grouping->set_max_results(1); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("Email"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - - // Each of the highest scored documents of schema type "Message" (document3) - // and "Email" (document1) should be returned. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document3; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document1; - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingMultiNamespaceAndSchemaGrouping) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 3 documents and ensures the relationship in terms of document - // score is: document1 < document2 < document3 < document4 < document5 < - // document6 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace1", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace2", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(3) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document4 = - DocumentBuilder() - .SetKey("namespace2", "uri/4") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(4) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document5 = - DocumentBuilder() - .SetKey("namespace3", "uri/5") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(5) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document6 = - DocumentBuilder() - .SetKey("namespace3", "uri/6") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(6) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk()); - - // "m" will match all 6 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace1"); - entry->set_schema("Message"); - result_grouping = result_spec.add_result_groupings(); - result_grouping->set_max_results(1); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace2"); - entry->set_schema("Message"); - result_grouping = result_spec.add_result_groupings(); - result_grouping->set_max_results(1); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("namespace3"); - entry->set_schema("Message"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - - // The three highest scored documents that fit the criteria of - // "namespace1xMessage" (document2), "namespace2xMessage" (document4), - // and "namespace3xMessage" (document6) should be returned. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document6; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document4; - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingNonexistentNamespaceShouldBeIgnored) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace1", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::NAMESPACE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace1"); - entry = result_grouping->add_entry_groupings(); - entry->set_namespace_("nonexistentNamespace"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - - // Only the top ranked document in "namespace" (document2), should be - // returned. The presence of "nonexistentNamespace" in the same result - // grouping should have no effect. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingNonexistentSchemaShouldBeIgnored) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace1", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_schema("Message"); - entry = result_grouping->add_entry_groupings(); - entry->set_schema("nonexistentMessage"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - - // Only the top ranked document in "Message" (document2), should be - // returned. The presence of "nonexistentMessage" in the same result - // grouping should have no effect. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document2; - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SearchResultGroupingNonexistentNamespaceAndSchemaShouldBeIgnored) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates 2 documents and ensures the relationship in terms of document - // score is: document1 < document2 - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri/1") - .SetSchema("Message") - .AddStringProperty("body", "message1") - .SetScore(1) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace1", "uri/2") - .SetSchema("Message") - .AddStringProperty("body", "message2") - .SetScore(2) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace2", "uri/3") - .SetSchema("Message") - .AddStringProperty("body", "message3") - .SetScore(3) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - DocumentProto document4 = - DocumentBuilder() - .SetKey("namespace2", "uri/4") - .SetSchema("Message") - .AddStringProperty("body", "message4") - .SetScore(4) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - - // "m" will match all 2 documents - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("m"); - - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - - ResultSpecProto result_spec; - result_spec.set_result_group_type(ResultSpecProto::SCHEMA_TYPE); - ResultSpecProto::ResultGrouping* result_grouping = - result_spec.add_result_groupings(); - ResultSpecProto::ResultGrouping::Entry* entry = - result_grouping->add_entry_groupings(); - result_grouping->set_max_results(1); - entry->set_namespace_("namespace2"); - entry->set_schema("Message"); - entry = result_grouping->add_entry_groupings(); - entry->set_schema("namespace1"); - entry->set_schema("nonexistentMessage"); - - SearchResultProto search_result_proto = - icing.Search(search_spec, scoring_spec, result_spec); - - // Only the top ranked document in "namespace2xMessage" (document4), should be - // returned. The presence of "namespace1xnonexistentMessage" in the same - // result grouping should have no effect. If either the namespace or the - // schema type is nonexistent, the entire entry will be ignored. - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document4; - - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, - SetSchemaCanNotDetectPreviousSchemaWasLostWithoutDocuments) { - SchemaProto schema; - auto type = schema.add_types(); - type->set_schema_type("Message"); - - auto body = type->add_properties(); - body->set_property_name("body"); - body->set_data_type(PropertyConfigProto::DataType::STRING); - body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - - // Make an incompatible schema, a previously OPTIONAL field is REQUIRED - SchemaProto incompatible_schema = schema; - incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality( - PropertyConfigProto::Cardinality::REQUIRED); - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - } // This should shut down IcingSearchEngine and persist anything it needs to - - ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str())); - - // Since we don't have any documents yet, we can't detect this edge-case. But - // it should be fine since there aren't any documents to be invalidated. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) { - SchemaProto schema; - auto type = schema.add_types(); - type->set_schema_type("Message"); - - auto body = type->add_properties(); - body->set_property_name("body"); - body->set_data_type(PropertyConfigProto::DataType::STRING); - body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); - body->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - body->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - // Make an incompatible schema, a previously OPTIONAL field is REQUIRED - SchemaProto incompatible_schema = schema; - incompatible_schema.mutable_types(0)->mutable_properties(0)->set_cardinality( - PropertyConfigProto::Cardinality::REQUIRED); - - SearchSpecProto search_spec; - search_spec.set_query("message"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Can retrieve by namespace/uri - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document; - - ASSERT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Can search for it - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - CreateMessageDocument("namespace", "uri"); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); - } // This should shut down IcingSearchEngine and persist anything it needs to - - ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str())); - - // Setting the new, different schema will remove incompatible documents - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk()); - - // Can't retrieve by namespace/uri - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); - expected_get_result_proto.mutable_status()->set_message( - "Document (namespace, uri) not found."); - - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Can't search for it - SearchResultProto empty_result; - empty_result.mutable_status()->set_code(StatusProto::OK); - SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, - EqualsSearchResultIgnoreStatsAndScores(empty_result)); -} - -TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) { - DocumentProto document = CreateMessageDocument("namespace", "uri"); - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } // Destructing calls a PersistToDisk(FULL) - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - - // There should be no recovery since everything should be saved properly. - InitializeResultProto init_result = icing.Initialize(); - EXPECT_THAT(init_result.status(), ProtoIsOk()); - EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - - // Schema is still intact. - GetSchemaResultProto expected_get_schema_result_proto; - expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - - EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto)); - - // Documents are still intact. - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document; - - EXPECT_THAT( - icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Index is still intact. - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); // Content in the Message document. - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document; - - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) { - DocumentProto document = CreateMessageDocument("namespace", "uri"); - - // Add schema and documents to our first icing1 instance. - IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); - EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); - - // Initialize a second icing2 instance which should have it's own memory - // space. If data from icing1 isn't being persisted to the files, then icing2 - // won't be able to see those changes. - IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); - - // There should be no recovery since everything should be saved properly. - InitializeResultProto init_result = icing2.Initialize(); - EXPECT_THAT(init_result.status(), ProtoIsOk()); - EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - - // Schema is still intact. - GetSchemaResultProto expected_get_schema_result_proto; - expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - - EXPECT_THAT(icing2.GetSchema(), - EqualsProto(expected_get_schema_result_proto)); - - // Documents are still intact. - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_result_proto.mutable_document() = document; - - EXPECT_THAT( - icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()), - EqualsProto(expected_get_result_proto)); - - // Index is still intact. - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); // Content in the Message document. - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document; - - SearchResultProto actual_results = - icing2.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) { - IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - DocumentProto document = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); - EXPECT_THAT( - icing1.Get("namespace", "uri", GetResultSpecProto::default_instance()) - .document(), - EqualsProto(document)); - - // It's intentional that no PersistToDisk call is made before initializing a - // second instance of icing. - - IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing2.Initialize(); - EXPECT_THAT(init_result.status(), ProtoIsOk()); - EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::PARTIAL_LOSS)); - EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::DATA_LOSS)); - EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - - // The document shouldn't be found because we forgot to call - // PersistToDisk(LITE)! - EXPECT_THAT( - icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::NOT_FOUND)); - - // Searching also shouldn't get us anything because the index wasn't - // recovered. - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); // Content in the Message document. - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - - SearchResultProto actual_results = - icing2.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) { - DocumentProto document = CreateMessageDocument("namespace", "uri"); - - IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache()); - EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk()); - EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk()); - EXPECT_THAT( - icing1.Get("namespace", "uri", GetResultSpecProto::default_instance()) - .document(), - EqualsProto(document)); - - IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing2.Initialize(); - EXPECT_THAT(init_result.status(), ProtoIsOk()); - EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - - // A checksum mismatch gets reported as an IO error. The document store and - // index didn't have their derived files included in the checksum previously, - // so reinitializing will trigger a checksum mismatch. - EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::IO_ERROR)); - EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::IO_ERROR)); - - // Schema is still intact. - GetSchemaResultProto expected_get_schema_result_proto; - expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema(); - - EXPECT_THAT(icing2.GetSchema(), - EqualsProto(expected_get_schema_result_proto)); - - // The document should be found because we called PersistToDisk(LITE)! - EXPECT_THAT( - icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()) - .document(), - EqualsProto(document)); - - // Recovered index is still intact. - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("message"); // Content in the Message document. - - SearchResultProto expected_search_result_proto; - expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - document; - - SearchResultProto actual_results = - icing2.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( - expected_search_result_proto)); -} - -TEST_F(IcingSearchEngineTest, ResetOk) { - SchemaProto message_schema = CreateMessageSchema(); - SchemaProto empty_schema = SchemaProto(message_schema); - empty_schema.clear_types(); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(message_schema).status(), ProtoIsOk()); - - int64_t empty_state_size = - filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str()); - - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Check that things have been added - EXPECT_THAT(filesystem()->GetDiskUsage(GetTestBaseDir().c_str()), - Gt(empty_state_size)); - - EXPECT_THAT(icing.Reset().status(), ProtoIsOk()); - - // Check that we're back to an empty state - EXPECT_EQ(filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str()), - empty_state_size); - - // Sanity check that we can still call other APIs. If things aren't cleared, - // then this should raise an error since the empty schema is incompatible with - // the old message_schema. - EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk()); -} - -TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesInternalError) { - auto mock_filesystem = std::make_unique(); - - // This fails IcingSearchEngine::Reset() with status code INTERNAL and leaves - // the IcingSearchEngine instance in an uninitialized state. - ON_CALL(*mock_filesystem, - DeleteDirectoryRecursively(StrEq(GetTestBaseDir().c_str()))) - .WillByDefault(Return(false)); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::move(mock_filesystem), - std::make_unique(), - std::make_unique(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::INTERNAL)); - - GetResultProto expected_get_result_proto; - expected_get_result_proto.mutable_status()->set_code( - StatusProto::FAILED_PRECONDITION); - *expected_get_result_proto.mutable_document() = document; - EXPECT_THAT(icing - .Get(document.namespace_(), document.uri(), - GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); -} - -TEST_F(IcingSearchEngineTest, SnippetNormalization) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "MDI zurich Team Meeting") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "mdi Zürich Team Meeting") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("mdi Zürich"); - - ResultSpecProto result_spec; - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(2); - result_spec.mutable_snippet_spec()->set_num_to_snippet(2); - - SearchResultProto results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(results.status(), ProtoIsOk()); - ASSERT_THAT(results.results(), SizeIs(2)); - const DocumentProto& result_document_1 = results.results(0).document(); - const SnippetProto& result_snippet_1 = results.results(0).snippet(); - EXPECT_THAT(result_document_1, EqualsProto(document_two)); - EXPECT_THAT(result_snippet_1.entries(), SizeIs(1)); - EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body")); - std::string_view content = GetString( - &result_document_1, result_snippet_1.entries(0).property_name()); - EXPECT_THAT( - GetWindows(content, result_snippet_1.entries(0)), - ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting")); - EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)), - ElementsAre("mdi", "Zürich")); - - const DocumentProto& result_document_2 = results.results(1).document(); - const SnippetProto& result_snippet_2 = results.results(1).snippet(); - EXPECT_THAT(result_document_2, EqualsProto(document_one)); - EXPECT_THAT(result_snippet_2.entries(), SizeIs(1)); - EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body")); - content = GetString(&result_document_2, - result_snippet_2.entries(0).property_name()); - EXPECT_THAT( - GetWindows(content, result_snippet_2.entries(0)), - ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting")); - EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)), - ElementsAre("MDI", "zurich")); -} - -TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "MDI zurich Team Meeting") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "mdi Zürich Team Meeting") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("md Zür"); - - ResultSpecProto result_spec; - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(2); - result_spec.mutable_snippet_spec()->set_num_to_snippet(2); - - SearchResultProto results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(results.status(), ProtoIsOk()); - ASSERT_THAT(results.results(), SizeIs(2)); - const DocumentProto& result_document_1 = results.results(0).document(); - const SnippetProto& result_snippet_1 = results.results(0).snippet(); - EXPECT_THAT(result_document_1, EqualsProto(document_two)); - EXPECT_THAT(result_snippet_1.entries(), SizeIs(1)); - EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body")); - std::string_view content = GetString( - &result_document_1, result_snippet_1.entries(0).property_name()); - EXPECT_THAT( - GetWindows(content, result_snippet_1.entries(0)), - ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting")); - EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)), - ElementsAre("mdi", "Zürich")); - - const DocumentProto& result_document_2 = results.results(1).document(); - const SnippetProto& result_snippet_2 = results.results(1).snippet(); - EXPECT_THAT(result_document_2, EqualsProto(document_one)); - EXPECT_THAT(result_snippet_2.entries(), SizeIs(1)); - EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body")); - content = GetString(&result_document_2, - result_snippet_2.entries(0).property_name()); - EXPECT_THAT( - GetWindows(content, result_snippet_2.entries(0)), - ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting")); - EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)), - ElementsAre("MDI", "zurich")); -} - -TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); - - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Email") - .AddStringProperty("subject", "MDI zurich Team Meeting") - .AddStringProperty("body", "MDI zurich Team Meeting") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Email") - .AddStringProperty("subject", "MDI zurich trip") - .AddStringProperty("body", "Let's travel to zurich") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - auto search_spec = std::make_unique(); - search_spec->set_term_match_type(TermMatchType::PREFIX); - search_spec->set_query("body:Zür"); - - auto result_spec = std::make_unique(); - result_spec->set_num_per_page(1); - result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64); - result_spec->mutable_snippet_spec()->set_num_matches_per_property(10); - result_spec->mutable_snippet_spec()->set_num_to_snippet(10); - - auto scoring_spec = std::make_unique(); - *scoring_spec = GetDefaultScoringSpec(); - - SearchResultProto results = - icing.Search(*search_spec, *scoring_spec, *result_spec); - EXPECT_THAT(results.status(), ProtoIsOk()); - ASSERT_THAT(results.results(), SizeIs(1)); - - const DocumentProto& result_document_two = results.results(0).document(); - const SnippetProto& result_snippet_two = results.results(0).snippet(); - EXPECT_THAT(result_document_two, EqualsProto(document_two)); - EXPECT_THAT(result_snippet_two.entries(), SizeIs(1)); - EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body")); - std::string_view content = GetString( - &result_document_two, result_snippet_two.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)), - ElementsAre("Let's travel to zurich")); - EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)), - ElementsAre("zurich")); - - search_spec.reset(); - scoring_spec.reset(); - result_spec.reset(); - - results = icing.GetNextPage(results.next_page_token()); - EXPECT_THAT(results.status(), ProtoIsOk()); - ASSERT_THAT(results.results(), SizeIs(1)); - - const DocumentProto& result_document_one = results.results(0).document(); - const SnippetProto& result_snippet_one = results.results(0).snippet(); - EXPECT_THAT(result_document_one, EqualsProto(document_one)); - EXPECT_THAT(result_snippet_one.entries(), SizeIs(1)); - EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body")); - content = GetString(&result_document_one, - result_snippet_one.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)), - ElementsAre("MDI zurich Team Meeting")); - EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)), - ElementsAre("zurich")); -} - -TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - - SchemaProto email_schema = CreateMessageSchema(); - EXPECT_THAT(icing.SetSchema(email_schema).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.GetSchema().status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - - DocumentProto doc = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing.Put(doc).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing - .Get(doc.namespace_(), doc.uri(), - GetResultSpecProto::default_instance()) - .status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type()) - .status() - .code(), - Eq(StatusProto::FAILED_PRECONDITION)); - - SearchSpecProto search_spec = SearchSpecProto::default_instance(); - ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance(); - ResultSpecProto result_spec = ResultSpecProto::default_instance(); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - constexpr int kSomePageToken = 12; - EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash. - - EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); - EXPECT_THAT(icing.Optimize().status(), - ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); -} - -TEST_F(IcingSearchEngineTest, GetAllNamespaces) { - DocumentProto namespace1 = DocumentBuilder() - .SetKey("namespace1", "uri") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(1000) - .Build(); - DocumentProto namespace2_uri1 = DocumentBuilder() - .SetKey("namespace2", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(1000) - .Build(); - DocumentProto namespace2_uri2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(1000) - .Build(); - - DocumentProto namespace3 = DocumentBuilder() - .SetKey("namespace3", "uri") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .SetCreationTimestampMs(100) - .SetTtlMs(500) - .Build(); - { - // Some arbitrary time that's less than all the document's creation time + - // ttl - auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(500); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // No namespaces exist yet - GetAllNamespacesResultProto result = icing.GetAllNamespaces(); - EXPECT_THAT(result.status(), ProtoIsOk()); - EXPECT_THAT(result.namespaces(), IsEmpty()); - - ASSERT_THAT(icing.Put(namespace1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(namespace2_uri1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(namespace2_uri2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(namespace3).status(), ProtoIsOk()); - - // All namespaces should exist now - result = icing.GetAllNamespaces(); - EXPECT_THAT(result.status(), ProtoIsOk()); - EXPECT_THAT(result.namespaces(), - UnorderedElementsAre("namespace1", "namespace2", "namespace3")); - - // After deleting namespace2_uri1 document, we still have namespace2_uri2 in - // "namespace2" so it should still show up - ASSERT_THAT(icing.Delete("namespace2", "uri1").status(), ProtoIsOk()); - - result = icing.GetAllNamespaces(); - EXPECT_THAT(result.status(), ProtoIsOk()); - EXPECT_THAT(result.namespaces(), - UnorderedElementsAre("namespace1", "namespace2", "namespace3")); - - // After deleting namespace2_uri2 document, we no longer have any documents - // in "namespace2" - ASSERT_THAT(icing.Delete("namespace2", "uri2").status(), ProtoIsOk()); - - result = icing.GetAllNamespaces(); - EXPECT_THAT(result.status(), ProtoIsOk()); - EXPECT_THAT(result.namespaces(), - UnorderedElementsAre("namespace1", "namespace3")); - } - - // We reinitialize here so we can feed in a fake clock this time - { - // Time needs to be past namespace3's creation time (100) + ttl (500) for it - // to count as "expired" - auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(1000); - - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Only valid document left is the one in "namespace1" - GetAllNamespacesResultProto result = icing.GetAllNamespaces(); - EXPECT_THAT(result.status(), ProtoIsOk()); - EXPECT_THAT(result.namespaces(), UnorderedElementsAre("namespace1")); - } -} - -TEST_F(IcingSearchEngineTest, Hyphens) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SchemaProto schema; - SchemaTypeConfigProto* type = schema.add_types(); - type->set_schema_type("MyType"); - PropertyConfigProto* prop = type->add_properties(); - prop->set_property_name("foo"); - prop->set_data_type(PropertyConfigProto::DataType::STRING); - prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - prop->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::EXACT_ONLY); - prop->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("MyType") - .AddStringProperty("foo", "foo bar-baz bat") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("MyType") - .AddStringProperty("foo", "bar for baz bat-man") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("foo:bar-baz"); - - ResultSpecProto result_spec; - SearchResultProto results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - - EXPECT_THAT(results.status(), ProtoIsOk()); - ASSERT_THAT(results.results(), SizeIs(2)); - EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); - EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); -} - -TEST_F(IcingSearchEngineTest, RestoreIndex) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", kIpsumText) - .Build(); - // 1. Create an index with a LiteIndex that will only allow one document - // before needing a merge. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Add two documents. These should get merged into the main index. - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = DocumentBuilder(document).SetUri("fake_type/1").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - // Add one document. This one should get remain in the lite index. - document = DocumentBuilder(document).SetUri("fake_type/2").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // 2. Delete the index file to trigger RestoreIndexIfNeeded. - std::string idx_subdir = GetIndexDir() + "/idx"; - filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); - - // 3. Create the index again. This should trigger index restoration. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("consectetur"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - SearchResultProto results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.next_page_token(), Eq(0)); - // All documents should be retrievable. - ASSERT_THAT(results.results(), SizeIs(3)); - EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2")); - EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1")); - EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0")); - } -} - -TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", kIpsumText) - .Build(); - // 1. Create an index with a LiteIndex that will only allow one document - // before needing a merge. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Add two documents. These should get merged into the main index. - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = DocumentBuilder(document).SetUri("fake_type/1").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - // Add one document. This one should get remain in the lite index. - document = DocumentBuilder(document).SetUri("fake_type/2").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // 2. Delete the last document from the document log - { - const std::string document_log_file = absl_ports::StrCat( - GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); - filesystem()->DeleteFile(document_log_file.c_str()); - ICING_ASSERT_OK_AND_ASSIGN( - auto create_result, - PortableFileBackedProtoLog::Create( - filesystem(), document_log_file.c_str(), - PortableFileBackedProtoLog::Options( - /*compress_in=*/true))); - std::unique_ptr> document_log = - std::move(create_result.proto_log); - - document = DocumentBuilder(document).SetUri("fake_type/0").Build(); - DocumentWrapper wrapper; - *wrapper.mutable_document() = document; - ASSERT_THAT(document_log->WriteProto(wrapper), IsOk()); - - document = DocumentBuilder(document).SetUri("fake_type/1").Build(); - *wrapper.mutable_document() = document; - ASSERT_THAT(document_log->WriteProto(wrapper), IsOk()); - } - - // 3. Create the index again. This should throw out the lite index and trigger - // index restoration which will only restore the two documents in the main - // index. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("consectetur"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - SearchResultProto results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.next_page_token(), Eq(0)); - // Only the documents that were in the main index should be retrievable. - ASSERT_THAT(results.results(), SizeIs(2)); - EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1")); - EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0")); - } -} - -TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", kIpsumText) - .Build(); - // 1. Create an index with a LiteIndex that will only allow one document - // before needing a merge. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Add two documents. These should get merged into the main index. - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = DocumentBuilder(document).SetUri("fake_type/1").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - // Add one document. This one should get remain in the lite index. - document = DocumentBuilder(document).SetUri("fake_type/2").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // 2. Delete the last two documents from the document log. - { - const std::string document_log_file = absl_ports::StrCat( - GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); - filesystem()->DeleteFile(document_log_file.c_str()); - ICING_ASSERT_OK_AND_ASSIGN( - auto create_result, - PortableFileBackedProtoLog::Create( - filesystem(), document_log_file.c_str(), - PortableFileBackedProtoLog::Options( - /*compress_in=*/true))); - std::unique_ptr> document_log = - std::move(create_result.proto_log); - - document = DocumentBuilder(document).SetUri("fake_type/0").Build(); - DocumentWrapper wrapper; - *wrapper.mutable_document() = document; - ASSERT_THAT(document_log->WriteProto(wrapper), IsOk()); - } - - // 3. Create the index again. This should throw out the lite and main index - // and trigger index restoration. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("consectetur"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - SearchResultProto results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.next_page_token(), Eq(0)); - // Only the first document should be retrievable. - ASSERT_THAT(results.results(), SizeIs(1)); - EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0")); - } -} - -TEST_F(IcingSearchEngineTest, - DocumentWithNoIndexedContentDoesntCauseRestoreIndex) { - // 1. Create an index with a single document in it that has no indexed - // content. - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Set a schema for a single type that has no indexed properties. - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("unindexedField") - .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) - .SetCardinality(CARDINALITY_REQUIRED))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - // Add a document that contains no indexed content. - DocumentProto document = - DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("unindexedField", - "Don't you dare search over this!") - .Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // 2. Create the index again. This should NOT trigger a recovery of any kind. - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - EXPECT_THAT(init_result.status(), ProtoIsOk()); - EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - } -} - -TEST_F(IcingSearchEngineTest, - DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) { - // 1. Create an index with a single document in it that has no valid indexed - // tokens in its content. - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Set a schema for a single type that has no indexed properties. - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Add a document that contains no valid indexed content - just punctuation. - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "?...!") - .Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // 2. Create the index again. This should NOT trigger a recovery of any kind. - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto init_result = icing.Initialize(); - EXPECT_THAT(init_result.status(), ProtoIsOk()); - EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - } -} - -TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", kIpsumText) - .Build(); - // 1. Create an index with a LiteIndex that will only allow one document - // before needing a merge. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - IcingSearchEngine icing(options, GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Add two documents. These should get merged into the main index. - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - document = DocumentBuilder(document).SetUri("fake_type/1").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - // Add one document. This one should get remain in the lite index. - document = DocumentBuilder(document).SetUri("fake_type/2").Build(); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // 2. Delete the index file to trigger RestoreIndexIfNeeded. - std::string idx_subdir = GetIndexDir() + "/idx"; - filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); - - // 3. Setup a mock filesystem to fail to grow the main index once. - bool has_failed_already = false; - auto open_write_lambda = [this, &has_failed_already](const char* filename) { - std::string main_lexicon_suffix = "/main-lexicon.prop.2"; - std::string filename_string(filename); - if (!has_failed_already && - filename_string.length() >= main_lexicon_suffix.length() && - filename_string.substr( - filename_string.length() - main_lexicon_suffix.length(), - main_lexicon_suffix.length()) == main_lexicon_suffix) { - has_failed_already = true; - return -1; - } - return this->filesystem()->OpenForWrite(filename); - }; - auto mock_icing_filesystem = std::make_unique(); - ON_CALL(*mock_icing_filesystem, OpenForWrite) - .WillByDefault(open_write_lambda); - - // 4. Create the index again. This should trigger index restoration. - { - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - options.set_index_merge_size(document.ByteSizeLong()); - TestIcingSearchEngine icing(options, std::make_unique(), - std::move(mock_icing_filesystem), - std::make_unique(), - GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), - ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); - - SearchSpecProto search_spec; - search_spec.set_query("consectetur"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - SearchResultProto results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.next_page_token(), Eq(0)); - // Only the last document that was added should still be retrievable. - ASSERT_THAT(results.results(), SizeIs(1)); - EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2")); - } -} - -TEST_F(IcingSearchEngineTest, InitializeShouldLogFunctionLatency) { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10)); -} - -TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) { - DocumentProto document1 = DocumentBuilder() - .SetKey("icing", "fake_type/1") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("icing", "fake_type/2") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - - { - // Initialize and put a document. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), - Eq(0)); - - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), - Eq(1)); - - // Put another document. - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), - Eq(2)); - } -} - -TEST_F(IcingSearchEngineTest, - InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) { - // Even though the fake timer will return 10, all the latency numbers related - // to recovery / restoration should be 0 during the first-time initialization. - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); -} - -TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - - { - // Initialize and put a document. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - { - // Append a non-checksummed document. This will mess up the checksum of the - // proto log, forcing it to rewind and later return a DATA_LOSS error. - const std::string serialized_document = document.SerializeAsString(); - const std::string document_log_file = absl_ports::StrCat( - GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); - - int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str()); - filesystem()->PWrite(document_log_file.c_str(), file_size, - serialized_document.data(), - serialized_document.size()); - } - - { - // Document store will rewind to previous checkpoint. The cause should be - // DATA_LOSS and the data status should be PARTIAL_LOSS. - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::DATA_LOSS)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(10)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::PARTIAL_LOSS)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .index_restoration_latency_ms(), - Eq(0)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); - } -} - -TEST_F(IcingSearchEngineTest, - InitializeShouldLogRecoveryCauseCompleteDataLoss) { - DocumentProto document1 = DocumentBuilder() - .SetKey("icing", "fake_type/1") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - - const std::string document_log_file = absl_ports::StrCat( - GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); - int64_t corruptible_offset; - - { - // Initialize and put a document. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // There's some space at the beginning of the file (e.g. header, kmagic, - // etc) that is necessary to initialize the FileBackedProtoLog. We can't - // corrupt that region, so we need to figure out the offset at which - // documents will be written to - which is the file size after - // initialization. - corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str()); - - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - } - - { - // "Corrupt" the content written in the log. Make the corrupt document - // smaller than our original one so we don't accidentally write past our - // file. - DocumentProto document = - DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); - std::string serialized_document = document.SerializeAsString(); - ASSERT_TRUE(filesystem()->PWrite( - document_log_file.c_str(), corruptible_offset, - serialized_document.data(), serialized_document.size())); - - PortableFileBackedProtoLog::Header header = - ReadDocumentLogHeader(*filesystem(), document_log_file); - - // Set dirty bit to true to reflect that something changed in the log. - header.SetDirtyFlag(true); - header.SetHeaderChecksum(header.CalculateHeaderChecksum()); - - WriteDocumentLogHeader(*filesystem(), document_log_file, header); - } - - { - // Document store will completely rewind. The cause should be DATA_LOSS and - // the data status should be COMPLETE_LOSS. - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::DATA_LOSS)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(10)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::COMPLETE_LOSS)); - // The complete rewind of ground truth causes us to clear the index, but - // that's not considered a restoration. - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .index_restoration_latency_ms(), - Eq(0)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); - } -} - -TEST_F(IcingSearchEngineTest, - InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - { - // Initialize and put a document. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - { - // Delete the index file to trigger RestoreIndexIfNeeded. - std::string idx_subdir = GetIndexDir() + "/idx"; - filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); - } - - { - // Index is empty but ground truth is not. Index should be restored due to - // the inconsistency. - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .index_restoration_latency_ms(), - Eq(10)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); - } -} - -TEST_F(IcingSearchEngineTest, - InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - IcingSearchEngineOptions options = GetDefaultIcingOptions(); - { - // Initialize and put one document. - IcingSearchEngine icing(options, GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - { - // Simulate a schema change where power is lost after the schema is written. - SchemaProto new_schema = - SchemaBuilder() - .AddType( - SchemaTypeConfigBuilder() - .SetType("Message") - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - // Write the marker file - std::string marker_filepath = - absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); - ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); - ASSERT_TRUE(sfd.is_valid()); - - // Write the new schema - FakeClock fake_clock; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr schema_store, - SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); - ICING_EXPECT_OK(schema_store->SetSchema(new_schema)); - } - - { - // Both document store and index should be recovered from checksum mismatch. - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .index_restoration_latency_ms(), - Eq(10)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(10)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); - } - - { - // No recovery should be needed. - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .index_restoration_latency_ms(), - Eq(0)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); - } -} - -TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - { - // Initialize and put one document. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // lambda to fail OpenForWrite on lite index hit buffer once. - bool has_failed_already = false; - auto open_write_lambda = [this, &has_failed_already](const char* filename) { - std::string lite_index_buffer_file_path = - absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb"); - std::string filename_string(filename); - if (!has_failed_already && filename_string == lite_index_buffer_file_path) { - has_failed_already = true; - return -1; - } - return this->filesystem()->OpenForWrite(filename); - }; - - auto mock_icing_filesystem = std::make_unique(); - // This fails Index::Create() once. - ON_CALL(*mock_icing_filesystem, OpenForWrite) - .WillByDefault(open_write_lambda); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::move(mock_icing_filesystem), - std::move(fake_clock), GetTestJniCache()); - - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::IO_ERROR)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_latency_ms(), - Eq(10)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); -} - -TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseDocStoreIOError) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - { - // Initialize and put one document. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - } - - // lambda to fail Read on document store header once. - bool has_failed_already = false; - auto read_lambda = [this, &has_failed_already](const char* filename, - void* buf, size_t buf_size) { - std::string document_store_header_file_path = - absl_ports::StrCat(GetDocumentDir(), "/document_store_header"); - std::string filename_string(filename); - if (!has_failed_already && - filename_string == document_store_header_file_path) { - has_failed_already = true; - return false; - } - return this->filesystem()->Read(filename, buf, buf_size); - }; - - auto mock_filesystem = std::make_unique(); - // This fails DocumentStore::InitializeDerivedFiles() once. - ON_CALL(*mock_filesystem, Read(A(), _, _)) - .WillByDefault(read_lambda); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::move(mock_filesystem), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::IO_ERROR)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(10)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(0)); -} - -TEST_F(IcingSearchEngineTest, - InitializeShouldLogRecoveryCauseSchemaStoreIOError) { - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - } - - { - // Delete the schema store header file to trigger an I/O error. - std::string schema_store_header_file_path = - GetSchemaDir() + "/schema_store_header"; - filesystem()->DeleteFile(schema_store_header_file_path.c_str()); - } - - { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_cause(), - Eq(InitializeStatsProto::IO_ERROR)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .schema_store_recovery_latency_ms(), - Eq(10)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .document_store_recovery_latency_ms(), - Eq(0)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().document_store_data_status(), - Eq(InitializeStatsProto::NO_DATA_LOSS)); - EXPECT_THAT( - initialize_result_proto.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); - EXPECT_THAT(initialize_result_proto.initialize_stats() - .index_restoration_latency_ms(), - Eq(0)); - } -} - -TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) { - { - // Initialize an empty storage. - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - // There should be 0 schema types. - EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), - Eq(0)); - - // Set a schema with one type config. - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - // There should be 1 schema type. - EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), - Eq(1)); - - // Create and set a schema with two type configs: Email and Message. - SchemaProto schema = CreateEmailSchema(); - - auto type = schema.add_types(); - type->set_schema_type("Message"); - auto body = type->add_properties(); - body->set_property_name("body"); - body->set_data_type(PropertyConfigProto::DataType::STRING); - body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); - body->mutable_string_indexing_config()->set_term_match_type( - TermMatchType::PREFIX); - body->mutable_string_indexing_config()->set_tokenizer_type( - StringIndexingConfig::TokenizerType::PLAIN); - - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - } - - { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - InitializeResultProto initialize_result_proto = icing.Initialize(); - EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), - Eq(2)); - } -} - -TEST_F(IcingSearchEngineTest, PutDocumentShouldLogFunctionLatency) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - PutResultProto put_result_proto = icing.Put(document); - EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10)); -} - -TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) { - DocumentProto document = - DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .AddStringProperty("body", "message body") - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - PutResultProto put_result_proto = icing.Put(document); - EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(), - Eq(10)); - size_t document_size = put_result_proto.put_document_stats().document_size(); - EXPECT_THAT(document_size, Ge(document.ByteSizeLong())); - EXPECT_THAT(document_size, Le(document.ByteSizeLong() + - sizeof(DocumentProto::InternalFields))); -} - -TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) { - DocumentProto document = DocumentBuilder() - .SetKey("icing", "fake_type/0") - .SetSchema("Message") - .AddStringProperty("body", "message body") - .Build(); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - PutResultProto put_result_proto = icing.Put(document); - EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10)); - // No merge should happen. - EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(), - Eq(0)); - // The input document has 2 tokens. - EXPECT_THAT(put_result_proto.put_document_stats() - .tokenization_stats() - .num_tokens_indexed(), - Eq(2)); -} - -TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) { - DocumentProto document1 = DocumentBuilder() - .SetKey("icing", "fake_type/1") - .SetSchema("Message") - .AddStringProperty("body", kIpsumText) - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("icing", "fake_type/2") - .SetSchema("Message") - .AddStringProperty("body", kIpsumText) - .Build(); - - // Create an icing instance with index_merge_size = document1's size. - IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); - icing_options.set_index_merge_size(document1.ByteSizeLong()); - - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(10); - TestIcingSearchEngine icing(icing_options, std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - // Putting document2 should trigger an index merge. - PutResultProto put_result_proto = icing.Put(document2); - EXPECT_THAT(put_result_proto.status(), ProtoIsOk()); - EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(), - Eq(10)); -} - -TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // 1. Add two email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("subject", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .AddStringProperty("subject", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - // 2. Issue a query that will match those documents and use an empty field - // mask to request NO properties. - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("hello"); - - ResultSpecProto result_spec; - // Retrieve only one result at a time to make sure that projection works when - // retrieving all pages. - result_spec.set_num_per_page(1); - TypePropertyMask* email_field_mask = result_spec.add_type_property_masks(); - email_field_mask->set_schema_type("Email"); - email_field_mask->add_paths(""); - - SearchResultProto results = - icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(1)); - - // 3. Verify that the returned results contain no properties. - DocumentProto projected_document_two = DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .Build(); - EXPECT_THAT(results.results(0).document(), - EqualsProto(projected_document_two)); - - results = icing.GetNextPage(results.next_page_token()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(1)); - DocumentProto projected_document_one = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .Build(); - EXPECT_THAT(results.results(0).document(), - EqualsProto(projected_document_one)); -} - -TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // 1. Add two email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("subject", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .AddStringProperty("subject", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - // 2. Issue a query that will match those documents and request only - // 'sender.name' and 'subject' properties. - // Create all of search_spec, result_spec and scoring_spec as objects with - // scope that will end before the call to GetNextPage to ensure that the - // implementation isn't relying on references to any of them. - auto search_spec = std::make_unique(); - search_spec->set_term_match_type(TermMatchType::PREFIX); - search_spec->set_query("hello"); - - auto result_spec = std::make_unique(); - // Retrieve only one result at a time to make sure that projection works when - // retrieving all pages. - result_spec->set_num_per_page(1); - TypePropertyMask* email_field_mask = result_spec->add_type_property_masks(); - email_field_mask->set_schema_type("Email"); - email_field_mask->add_paths("sender.name"); - email_field_mask->add_paths("subject"); - - auto scoring_spec = std::make_unique(); - *scoring_spec = GetDefaultScoringSpec(); - SearchResultProto results = - icing.Search(*search_spec, *scoring_spec, *result_spec); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(1)); - - // 3. Verify that the first returned result only contains the 'sender.name' - // property. - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .Build()) - .AddStringProperty("subject", "Goodnight Moon!") - .Build(); - EXPECT_THAT(results.results(0).document(), - EqualsProto(projected_document_two)); - - // 4. Now, delete all of the specs used in the search. GetNextPage should have - // no problem because it shouldn't be keeping any references to them. - search_spec.reset(); - result_spec.reset(); - scoring_spec.reset(); - - // 5. Verify that the second returned result only contains the 'sender.name' - // property. - results = icing.GetNextPage(results.next_page_token()); - EXPECT_THAT(results.status(), ProtoIsOk()); - EXPECT_THAT(results.results(), SizeIs(1)); - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .Build()) - .AddStringProperty("subject", "Hello World!") - .Build(); - EXPECT_THAT(results.results(0).document(), - EqualsProto(projected_document_one)); -} - -TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(5); - TestIcingSearchEngine icing(GetDefaultIcingOptions(), - std::make_unique(), - std::make_unique(), - std::move(fake_clock), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Creates and inserts 5 documents - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); - DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.add_namespace_filters("namespace"); - search_spec.add_schema_type_filters(document1.schema()); - search_spec.set_query("message"); - - ResultSpecProto result_spec; - result_spec.set_num_per_page(2); - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); - result_spec.mutable_snippet_spec()->set_num_to_snippet(3); - - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by( - ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); - - // Searches and gets the first page, 2 results with 2 snippets - SearchResultProto search_result = - icing.Search(search_spec, scoring_spec, result_spec); - ASSERT_THAT(search_result.status(), ProtoIsOk()); - ASSERT_THAT(search_result.results(), SizeIs(2)); - ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken)); - - // Check the stats - QueryStatsProto exp_stats; - exp_stats.set_query_length(7); - exp_stats.set_num_terms(1); - exp_stats.set_num_namespaces_filtered(1); - exp_stats.set_num_schema_types_filtered(1); - exp_stats.set_ranking_strategy( - ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); - exp_stats.set_is_first_page(true); - exp_stats.set_requested_page_size(2); - exp_stats.set_num_results_returned_current_page(2); - exp_stats.set_num_documents_scored(5); - exp_stats.set_num_results_with_snippets(2); - exp_stats.set_latency_ms(5); - exp_stats.set_parse_query_latency_ms(5); - exp_stats.set_scoring_latency_ms(5); - exp_stats.set_ranking_latency_ms(5); - exp_stats.set_document_retrieval_latency_ms(5); - exp_stats.set_lock_acquisition_latency_ms(5); - EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); - - // Second page, 2 result with 1 snippet - search_result = icing.GetNextPage(search_result.next_page_token()); - ASSERT_THAT(search_result.status(), ProtoIsOk()); - ASSERT_THAT(search_result.results(), SizeIs(2)); - ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); - - exp_stats = QueryStatsProto(); - exp_stats.set_is_first_page(false); - exp_stats.set_requested_page_size(2); - exp_stats.set_num_results_returned_current_page(2); - exp_stats.set_num_results_with_snippets(1); - exp_stats.set_latency_ms(5); - exp_stats.set_document_retrieval_latency_ms(5); - exp_stats.set_lock_acquisition_latency_ms(5); - EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); - - // Third page, 1 result with 0 snippets - search_result = icing.GetNextPage(search_result.next_page_token()); - ASSERT_THAT(search_result.status(), ProtoIsOk()); - ASSERT_THAT(search_result.results(), SizeIs(1)); - ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); - - exp_stats = QueryStatsProto(); - exp_stats.set_is_first_page(false); - exp_stats.set_requested_page_size(2); - exp_stats.set_num_results_returned_current_page(1); - exp_stats.set_num_results_with_snippets(0); - exp_stats.set_latency_ms(5); - exp_stats.set_document_retrieval_latency_ms(5); - exp_stats.set_lock_acquisition_latency_ms(5); - EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); -} - -TEST_F(IcingSearchEngineTest, OptimizeStatsProtoTest) { - auto fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(5); - fake_clock->SetSystemTimeMilliseconds(10000); - auto icing = std::make_unique( - GetDefaultIcingOptions(), std::make_unique(), - std::make_unique(), std::move(fake_clock), - GetTestJniCache()); - ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Create three documents. - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - document2.set_creation_timestamp_ms(9000); - document2.set_ttl_ms(500); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk()); - - // Delete the first document. - ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(), - ProtoIsOk()); - ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); - - OptimizeStatsProto expected; - expected.set_latency_ms(5); - expected.set_document_store_optimize_latency_ms(5); - expected.set_index_restoration_latency_ms(5); - expected.set_num_original_documents(3); - expected.set_num_deleted_documents(1); - expected.set_num_expired_documents(1); - expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION); - - // Run Optimize - OptimizeResultProto result = icing->Optimize(); - // Depending on how many blocks the documents end up spread across, it's - // possible that Optimize can remove documents without shrinking storage. The - // first Optimize call will also write the OptimizeStatusProto for the first - // time which will take up 1 block. So make sure that before_size is no less - // than after_size - 1 block. - uint32_t page_size = getpagesize(); - EXPECT_THAT(result.optimize_stats().storage_size_before(), - Ge(result.optimize_stats().storage_size_after() - page_size)); - result.mutable_optimize_stats()->clear_storage_size_before(); - result.mutable_optimize_stats()->clear_storage_size_after(); - EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); - - fake_clock = std::make_unique(); - fake_clock->SetTimerElapsedMilliseconds(5); - fake_clock->SetSystemTimeMilliseconds(20000); - icing = std::make_unique( - GetDefaultIcingOptions(), std::make_unique(), - std::make_unique(), std::move(fake_clock), - GetTestJniCache()); - ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); - - expected = OptimizeStatsProto(); - expected.set_latency_ms(5); - expected.set_document_store_optimize_latency_ms(5); - expected.set_index_restoration_latency_ms(5); - expected.set_num_original_documents(1); - expected.set_num_deleted_documents(0); - expected.set_num_expired_documents(0); - expected.set_time_since_last_optimize_ms(10000); - expected.set_index_restoration_mode(OptimizeStatsProto::INDEX_TRANSLATION); - - // Run Optimize - result = icing->Optimize(); - EXPECT_THAT(result.optimize_stats().storage_size_before(), - Eq(result.optimize_stats().storage_size_after())); - result.mutable_optimize_stats()->clear_storage_size_before(); - result.mutable_optimize_stats()->clear_storage_size_after(); - EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); - - // Delete the last document. - ASSERT_THAT(icing->Delete(document3.namespace_(), document3.uri()).status(), - ProtoIsOk()); - - expected = OptimizeStatsProto(); - expected.set_latency_ms(5); - expected.set_document_store_optimize_latency_ms(5); - expected.set_index_restoration_latency_ms(5); - expected.set_num_original_documents(1); - expected.set_num_deleted_documents(1); - expected.set_num_expired_documents(0); - expected.set_time_since_last_optimize_ms(0); - // Should rebuild the index since all documents are removed. - expected.set_index_restoration_mode(OptimizeStatsProto::FULL_INDEX_REBUILD); - - // Run Optimize - result = icing->Optimize(); - EXPECT_THAT(result.optimize_stats().storage_size_before(), - Ge(result.optimize_stats().storage_size_after())); - result.mutable_optimize_stats()->clear_storage_size_before(); - result.mutable_optimize_stats()->clear_storage_size_after(); - EXPECT_THAT(result.optimize_stats(), EqualsProto(expected)); -} - -TEST_F(IcingSearchEngineTest, StorageInfoTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Create three documents. - DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); - DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); - DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - // Ensure that total_storage_size is set. All the other stats are covered by - // the classes that generate them. - StorageInfoResultProto result = icing.GetStorageInfo(); - EXPECT_THAT(result.status(), ProtoIsOk()); - EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0)); -} - -TEST_F(IcingSearchEngineTest, SnippetErrorTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty( - PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REPEATED))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetScore(10) - .SetSchema("Generic") - .AddStringProperty("subject", "I like cats", "I like dogs", - "I like birds", "I like fish") - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetScore(20) - .SetSchema("Generic") - .AddStringProperty("subject", "I like red", "I like green", - "I like blue", "I like yellow") - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri3") - .SetScore(5) - .SetSchema("Generic") - .AddStringProperty("subject", "I like cupcakes", "I like donuts", - "I like eclairs", "I like froyo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.add_schema_type_filters("Generic"); - search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - search_spec.set_query("like"); - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - ResultSpecProto result_spec; - result_spec.mutable_snippet_spec()->set_num_to_snippet(2); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(3); - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4); - SearchResultProto search_results = - icing.Search(search_spec, scoring_spec, result_spec); - - ASSERT_THAT(search_results.results(), SizeIs(3)); - const SearchResultProto::ResultProto* result = &search_results.results(0); - EXPECT_THAT(result->document().uri(), Eq("uri2")); - ASSERT_THAT(result->snippet().entries(), SizeIs(3)); - const SnippetProto::EntryProto* entry = &result->snippet().entries(0); - EXPECT_THAT(entry->property_name(), "subject[0]"); - std::string_view content = GetString(&result->document(), "subject[0]"); - EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); - - entry = &result->snippet().entries(1); - EXPECT_THAT(entry->property_name(), "subject[1]"); - content = GetString(&result->document(), "subject[1]"); - EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); - - entry = &result->snippet().entries(2); - EXPECT_THAT(entry->property_name(), "subject[2]"); - content = GetString(&result->document(), "subject[2]"); - EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); - - result = &search_results.results(1); - EXPECT_THAT(result->document().uri(), Eq("uri1")); - ASSERT_THAT(result->snippet().entries(), SizeIs(3)); - entry = &result->snippet().entries(0); - EXPECT_THAT(entry->property_name(), "subject[0]"); - content = GetString(&result->document(), "subject[0]"); - EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); - - entry = &result->snippet().entries(1); - ASSERT_THAT(entry->property_name(), "subject[1]"); - content = GetString(&result->document(), "subject[1]"); - EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); - - entry = &result->snippet().entries(2); - ASSERT_THAT(entry->property_name(), "subject[2]"); - content = GetString(&result->document(), "subject[2]"); - EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like")); - - result = &search_results.results(2); - ASSERT_THAT(result->document().uri(), Eq("uri3")); - ASSERT_THAT(result->snippet().entries(), IsEmpty()); -} - -TEST_F(IcingSearchEngineTest, CJKSnippetTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // String: "我每天走路去上班。" - // ^ ^ ^ ^^ - // UTF8 idx: 0 3 9 15 18 - // UTF16 idx: 0 1 3 5 6 - // Breaks into segments: "我", "每天", "走路", "去", "上班" - constexpr std::string_view kChinese = "我每天走路去上班。"; - DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", kChinese) - .Build(); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - - // Search and request snippet matching but no windowing. - SearchSpecProto search_spec; - search_spec.set_query("走"); - search_spec.set_term_match_type(TERM_MATCH_PREFIX); - - ResultSpecProto result_spec; - result_spec.mutable_snippet_spec()->set_num_to_snippet( - std::numeric_limits::max()); - result_spec.mutable_snippet_spec()->set_num_matches_per_property( - std::numeric_limits::max()); - - // Search and make sure that we got a single successful result - SearchResultProto search_results = icing.Search( - search_spec, ScoringSpecProto::default_instance(), result_spec); - ASSERT_THAT(search_results.status(), ProtoIsOk()); - ASSERT_THAT(search_results.results(), SizeIs(1)); - const SearchResultProto::ResultProto* result = &search_results.results(0); - EXPECT_THAT(result->document().uri(), Eq("uri1")); - - // Ensure that one and only one property was matched and it was "body" - ASSERT_THAT(result->snippet().entries(), SizeIs(1)); - const SnippetProto::EntryProto* entry = &result->snippet().entries(0); - EXPECT_THAT(entry->property_name(), Eq("body")); - - // Get the content for "subject" and see what the match is. - std::string_view content = GetString(&result->document(), "body"); - ASSERT_THAT(content, Eq(kChinese)); - - // Ensure that there is one and only one match within "subject" - ASSERT_THAT(entry->snippet_matches(), SizeIs(1)); - const SnippetMatchProto& match_proto = entry->snippet_matches(0); - - EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9)); - EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6)); - std::string_view match = - content.substr(match_proto.exact_match_byte_position(), - match_proto.exact_match_byte_length()); - ASSERT_THAT(match, Eq("走路")); - - // Ensure that the utf-16 values are also as expected - EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3)); - EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2)); -} - -TEST_F(IcingSearchEngineTest, InvalidToEmptyQueryTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // String: "Luca Brasi sleeps with the 🐟🐟🐟." - // ^ ^ ^ ^ ^ ^ ^ ^ ^ - // UTF8 idx: 0 5 11 18 23 27 3135 39 - // UTF16 idx: 0 5 11 18 23 27 2931 33 - // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟" - // and "🐟". - constexpr std::string_view kSicilianMessage = - "Luca Brasi sleeps with the 🐟🐟🐟."; - DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", kSicilianMessage) - .Build(); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "Some other content.") - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - // Search and request snippet matching but no windowing. - SearchSpecProto search_spec; - search_spec.set_query("?"); - search_spec.set_term_match_type(TERM_MATCH_PREFIX); - ScoringSpecProto scoring_spec; - ResultSpecProto result_spec; - - // Search and make sure that we got a single successful result - SearchResultProto search_results = - icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_results.status(), ProtoIsOk()); - EXPECT_THAT(search_results.results(), SizeIs(2)); - - search_spec.set_query("。"); - search_results = icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_results.status(), ProtoIsOk()); - EXPECT_THAT(search_results.results(), SizeIs(2)); - - search_spec.set_query("-"); - search_results = icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_results.status(), ProtoIsOk()); - EXPECT_THAT(search_results.results(), SizeIs(2)); - - search_spec.set_query(":"); - search_results = icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_results.status(), ProtoIsOk()); - EXPECT_THAT(search_results.results(), SizeIs(2)); - - search_spec.set_query("OR"); - search_results = icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_results.status(), ProtoIsOk()); - EXPECT_THAT(search_results.results(), SizeIs(2)); - - search_spec.set_query(" "); - search_results = icing.Search(search_spec, scoring_spec, result_spec); - EXPECT_THAT(search_results.status(), ProtoIsOk()); - EXPECT_THAT(search_results.results(), SizeIs(2)); -} - -TEST_F(IcingSearchEngineTest, EmojiSnippetTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // String: "Luca Brasi sleeps with the 🐟🐟🐟." - // ^ ^ ^ ^ ^ ^ ^ ^ ^ - // UTF8 idx: 0 5 11 18 23 27 3135 39 - // UTF16 idx: 0 5 11 18 23 27 2931 33 - // Breaks into segments: "Luca", "Brasi", "sleeps", "with", "the", "🐟", "🐟" - // and "🐟". - constexpr std::string_view kSicilianMessage = - "Luca Brasi sleeps with the 🐟🐟🐟."; - DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", kSicilianMessage) - .Build(); - ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Message") - .AddStringProperty("body", "Some other content.") - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - // Search and request snippet matching but no windowing. - SearchSpecProto search_spec; - search_spec.set_query("🐟"); - search_spec.set_term_match_type(TERM_MATCH_PREFIX); - - ResultSpecProto result_spec; - result_spec.mutable_snippet_spec()->set_num_to_snippet(1); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); - - // Search and make sure that we got a single successful result - SearchResultProto search_results = icing.Search( - search_spec, ScoringSpecProto::default_instance(), result_spec); - ASSERT_THAT(search_results.status(), ProtoIsOk()); - ASSERT_THAT(search_results.results(), SizeIs(1)); - const SearchResultProto::ResultProto* result = &search_results.results(0); - EXPECT_THAT(result->document().uri(), Eq("uri1")); - - // Ensure that one and only one property was matched and it was "body" - ASSERT_THAT(result->snippet().entries(), SizeIs(1)); - const SnippetProto::EntryProto* entry = &result->snippet().entries(0); - EXPECT_THAT(entry->property_name(), Eq("body")); - - // Get the content for "subject" and see what the match is. - std::string_view content = GetString(&result->document(), "body"); - ASSERT_THAT(content, Eq(kSicilianMessage)); - - // Ensure that there is one and only one match within "subject" - ASSERT_THAT(entry->snippet_matches(), SizeIs(1)); - const SnippetMatchProto& match_proto = entry->snippet_matches(0); - - EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(27)); - EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(4)); - std::string_view match = - content.substr(match_proto.exact_match_byte_position(), - match_proto.exact_match_byte_length()); - ASSERT_THAT(match, Eq("🐟")); - - // Ensure that the utf-16 values are also as expected - EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(27)); - EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2)); -} - -TEST_F(IcingSearchEngineTest, PutDocumentIndexFailureDeletion) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - - // Testing has shown that adding ~600,000 terms generated this way will - // fill up the hit buffer. - std::vector terms = GenerateUniqueTerms(600000); - std::string content = absl_ports::StrJoin(terms, " "); - DocumentProto document = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Message") - .AddStringProperty("body", "foo " + content) - .Build(); - // We failed to add the document to the index fully. This means that we should - // reject the document from Icing entirely. - ASSERT_THAT(icing.Put(document).status(), - ProtoStatusIs(StatusProto::OUT_OF_SPACE)); - - // Make sure that the document isn't searchable. - SearchSpecProto search_spec; - search_spec.set_query("foo"); - search_spec.set_term_match_type(TERM_MATCH_PREFIX); - - SearchResultProto search_results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - ASSERT_THAT(search_results.status(), ProtoIsOk()); - ASSERT_THAT(search_results.results(), IsEmpty()); - - // Make sure that the document isn't retrievable. - GetResultProto get_result = - icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()); - ASSERT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // Creates and inserts 6 documents, and index 6 termSix, 5 termFive, 4 - // termFour, 3 termThree, 2 termTwo and one termOne. - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty( - "subject", "termOne termTwo termThree termFour termFive termSix") - .Build(); - DocumentProto document2 = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", - "termTwo termThree termFour termFive termSix") - .Build(); - DocumentProto document3 = - DocumentBuilder() - .SetKey("namespace", "uri3") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termThree termFour termFive termSix") - .Build(); - DocumentProto document4 = - DocumentBuilder() - .SetKey("namespace", "uri4") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termFour termFive termSix") - .Build(); - DocumentProto document5 = - DocumentBuilder() - .SetKey("namespace", "uri5") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termFive termSix") - .Build(); - DocumentProto document6 = DocumentBuilder() - .SetKey("namespace", "uri6") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termSix") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk()); - - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("t"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - // Query all suggestions, and they will be ranked. - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions().at(0).query(), "termsix"); - ASSERT_THAT(response.suggestions().at(1).query(), "termfive"); - ASSERT_THAT(response.suggestions().at(2).query(), "termfour"); - ASSERT_THAT(response.suggestions().at(3).query(), "termthree"); - ASSERT_THAT(response.suggestions().at(4).query(), "termtwo"); - ASSERT_THAT(response.suggestions().at(5).query(), "termone"); - - // Query first three suggestions, and they will be ranked. - suggestion_spec.set_num_to_return(3); - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions().at(0).query(), "termsix"); - ASSERT_THAT(response.suggestions().at(1).query(), "termfive"); - ASSERT_THAT(response.suggestions().at(2).query(), "termfour"); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_ShouldReturnInOneNamespace) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - - // namespace1 has 2 results. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.add_namespace_filters("namespace1"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFoo), - EqualsProto(suggestionFool))); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_ShouldReturnInMultipleNamespace) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fo") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace3", "uri3") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - - // namespace2 and namespace3 has 2 results. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.add_namespace_filters("namespace2"); - suggestion_spec.add_namespace_filters("namespace3"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFoo), - EqualsProto(suggestionFool))); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NamespaceNotFound) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fo") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // Search for non-exist namespace3 - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.add_namespace_filters("namespace3"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - EXPECT_THAT(response.status().code(), Eq(StatusProto::OK)); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - // Index 4 documents, - // namespace1 has 2 hit2 for term one - // namespace2 has 2 hit2 for term two and 1 hit for term one. - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termone") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termone") - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termone termtwo") - .Build(); - DocumentProto document4 = DocumentBuilder() - .SetKey("namespace2", "uri3") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "termtwo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionTermOne; - suggestionTermOne.set_query("termone"); - SuggestionResponse::Suggestion suggestionTermTwo; - suggestionTermTwo.set_query("termtwo"); - - // only search suggestion for namespace2. The correctly order should be - // {"termtwo", "termone"}. If we're not filtering out namespace1 when - // calculating our score, then it will be {"termone", "termtwo"}. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("t"); - suggestion_spec.add_namespace_filters("namespace2"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - ElementsAre(EqualsProto(suggestionTermTwo), - EqualsProto(suggestionTermOne))); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DeletionTest) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - - // namespace1 has this suggestion - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.add_namespace_filters("namespace1"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); - - // namespace2 has this suggestion - suggestion_spec.clear_namespace_filters(); - suggestion_spec.add_namespace_filters("namespace2"); - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); - - // delete document from namespace 1 - EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk()); - - // Now namespace1 will return empty - suggestion_spec.clear_namespace_filters(); - suggestion_spec.add_namespace_filters("namespace1"); - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), IsEmpty()); - - // namespace2 still has this suggestion, so we can prove the reason of - // namespace 1 cannot find it is we filter it out, not it doesn't exist. - suggestion_spec.add_namespace_filters("namespace2"); - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ShouldReturnInOneDocument) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - - // Only search in namespace1,uri1 - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - NamespaceDocumentUriGroup* namespace1_uri1 = - suggestion_spec.add_document_uri_filters(); - namespace1_uri1->set_namespace_("namespace1"); - namespace1_uri1->add_document_uris("uri1"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); - - // Only search in namespace1,uri2 - suggestion_spec.clear_document_uri_filters(); - NamespaceDocumentUriGroup* namespace1_uri2 = - suggestion_spec.add_document_uri_filters(); - namespace1_uri2->set_namespace_("namespace1"); - namespace1_uri2->add_document_uris("uri2"); - - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFoo))); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_ShouldReturnInMultipleDocument) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace1", "uri3") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - - // Only search document in namespace1,uri1 and namespace2,uri2 - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - NamespaceDocumentUriGroup* namespace1_uri1_uri2 = - suggestion_spec.add_document_uri_filters(); - namespace1_uri1_uri2->set_namespace_("namespace1"); - namespace1_uri1_uri2->add_document_uris("uri1"); - namespace1_uri1_uri2->add_document_uris("uri2"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool), - EqualsProto(suggestionFoo))); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_ShouldReturnInDesiredDocumentAndNamespace) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - DocumentProto document3 = DocumentBuilder() - .SetKey("namespace3", "uri3") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - - // Only search document in namespace1,uri1 and all documents under namespace2 - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - suggestion_spec.add_namespace_filters("namespace1"); - suggestion_spec.add_namespace_filters("namespace2"); - NamespaceDocumentUriGroup* namespace1_uri1 = - suggestion_spec.add_document_uri_filters(); - namespace1_uri1->set_namespace_("namespace1"); - namespace1_uri1->add_document_uris("uri1"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool), - EqualsProto(suggestionFoo))); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DocumentIdDoesntExist) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // Search for a non-exist document id : namespace3,uri3 - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - suggestion_spec.add_namespace_filters("namespace3"); - NamespaceDocumentUriGroup* namespace3_uri3 = - suggestion_spec.add_document_uri_filters(); - namespace3_uri3->set_namespace_("namespace3"); - namespace3_uri3->add_document_uris("uri3"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), IsEmpty()); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_DocumentIdFilterDoesntMatchNamespaceFilter) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "foo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - // Search for the document namespace1,uri1 with namespace filter in - // namespace2. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - NamespaceDocumentUriGroup* namespace1_uri1 = - suggestion_spec.add_document_uri_filters(); - namespace1_uri1->set_namespace_("namespace1"); - namespace1_uri1->add_document_uris("uri1"); - suggestion_spec.add_namespace_filters("namespace2"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_EmptyDocumentIdInNamespace) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - // Give empty document uris in namespace 1 - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - NamespaceDocumentUriGroup* namespace1_uri1 = - suggestion_spec.add_document_uri_filters(); - namespace1_uri1->set_namespace_("namespace1"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_ShouldReturnInDesiredSchemaType) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( - PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", - /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .AddDocumentProperty("sender", DocumentBuilder() - .SetKey("namespace", "uri1-sender") - .SetSchema("Person") - .AddStringProperty("name", "foo") - .Build()) - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Message") - .SetCreationTimestampMs(10) - .AddStringProperty("body", "fo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - suggestion_spec.add_schema_type_filters("Email"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFoo), - EqualsProto(suggestionFool))); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_SchemaTypeNotFound) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .SetCreationTimestampMs(10) - .AddStringProperty("body", "fo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - suggestion_spec.add_schema_type_filters("Email"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), IsEmpty()); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_ShouldReturnInDesiredProperty) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri1-sender") - .SetSchema("Person") - .AddStringProperty("name", "foo") - .AddStringProperty("emailAddress", "fo") - .Build()) - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - // Only search in subject. - TypePropertyMask* mask = suggestion_spec.add_type_property_filters(); - mask->set_schema_type("Email"); - mask->add_paths("subject"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); - - // Search in subject and sender.name - suggestion_spec.clear_type_property_filters(); - mask = suggestion_spec.add_type_property_filters(); - mask->set_schema_type("Email"); - mask->add_paths("subject"); - mask->add_paths("sender.name"); - - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFoo), - EqualsProto(suggestionFool))); -} - -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_NestedPropertyReturnNothing) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .AddDocumentProperty("sender", DocumentBuilder() - .SetKey("namespace", "uri1-sender") - .SetSchema("Person") - .AddStringProperty("name", "foo") - .Build()) - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - // Only search in Person.name. - suggestion_spec.add_schema_type_filters("Person"); - TypePropertyMask* mask = suggestion_spec.add_type_property_filters(); - mask->set_schema_type("Person"); - mask->add_paths("name"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), IsEmpty()); -} +TEST_F(IcingSearchEngineTest, ResetOk) { + SchemaProto message_schema = CreateMessageSchema(); + SchemaProto empty_schema = SchemaProto(message_schema); + empty_schema.clear_types(); -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_PropertyFilterAndSchemaFilter) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( - PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", - /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(message_schema).status(), ProtoIsOk()); - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(10) - .AddStringProperty("subject", "fool") - .AddDocumentProperty("sender", DocumentBuilder() - .SetKey("namespace", "uri1-sender") - .SetSchema("Person") - .AddStringProperty("name", "foo") - .Build()) - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace1", "uri2") - .SetSchema("Message") - .SetCreationTimestampMs(10) - .AddStringProperty("body", "fo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + int64_t empty_state_size = + filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str()); - SuggestionResponse::Suggestion suggestionFoo; - suggestionFoo.set_query("foo"); - SuggestionResponse::Suggestion suggestionFo; - suggestionFo.set_query("fo"); - - // Search in sender.name of Email and everything in Message. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - suggestion_spec.add_schema_type_filters("Email"); - suggestion_spec.add_schema_type_filters("Message"); - TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters(); - mask1->set_schema_type("Email"); - mask1->add_paths("sender.name"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFoo), - EqualsProto(suggestionFo))); -} + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); -TEST_F(IcingSearchEngineTest, - SearchSuggestionsTest_PropertyFilterNotMatchSchemaFilter) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( - PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", - /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .SetCreationTimestampMs(10) - .AddStringProperty("body", "fo") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + // Check that things have been added + EXPECT_THAT(filesystem()->GetDiskUsage(GetTestBaseDir().c_str()), + Gt(empty_state_size)); + + EXPECT_THAT(icing.Reset().status(), ProtoIsOk()); + + // Check that we're back to an empty state + EXPECT_EQ(filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str()), + empty_state_size); - // Search in sender.name of Email but schema type is Message. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - suggestion_spec.add_schema_type_filters("Message"); - TypePropertyMask* mask1 = suggestion_spec.add_type_property_filters(); - mask1->set_schema_type("Email"); - mask1->add_paths("sender.name"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - EXPECT_THAT(response.status().code(), Eq(StatusProto::INVALID_ARGUMENT)); + // Sanity check that we can still call other APIs. If things aren't cleared, + // then this should raise an error since the empty schema is incompatible with + // the old message_schema. + EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk()); } -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_OrderByTermFrequency) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); +TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesInternalError) { + auto mock_filesystem = std::make_unique(); + + // This fails IcingSearchEngine::Reset() with status code INTERNAL and leaves + // the IcingSearchEngine instance in an uninitialized state. + ON_CALL(*mock_filesystem, + DeleteDirectoryRecursively(StrEq(GetTestBaseDir().c_str()))) + .WillByDefault(Return(false)); + + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::move(mock_filesystem), + std::make_unique(), + std::make_unique(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( - PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_REQUIRED))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - DocumentProto document1 = - DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Message") - .SetCreationTimestampMs(10) - .AddStringProperty( - "body", "termthree termthree termthree termtwo termtwo termone") - .Build(); - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + DocumentProto document = CreateMessageDocument("namespace", "uri"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::INTERNAL)); - // Search in sender.name of Email but schema type is Message. - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("t"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::EXACT_ONLY); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY); - - SuggestionResponse::Suggestion suggestionTermOne; - suggestionTermOne.set_query("termone"); - SuggestionResponse::Suggestion suggestionTermTwo; - suggestionTermTwo.set_query("termtwo"); - SuggestionResponse::Suggestion suggestionTermThree; - suggestionTermThree.set_query("termthree"); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - ElementsAre(EqualsProto(suggestionTermThree), - EqualsProto(suggestionTermTwo), - EqualsProto(suggestionTermOne))); + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code( + StatusProto::FAILED_PRECONDITION); + *expected_get_result_proto.mutable_document() = document; + EXPECT_THAT(icing + .Get(document.namespace_(), document.uri(), + GetResultSpecProto::default_instance()) + .status(), + ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); } -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ExpiredTest) { - DocumentProto document1 = DocumentBuilder() - .SetKey("namespace1", "uri1") - .SetSchema("Email") - .SetCreationTimestampMs(100) - .SetTtlMs(500) - .AddStringProperty("subject", "fool") - .Build(); - DocumentProto document2 = DocumentBuilder() - .SetKey("namespace2", "uri2") - .SetSchema("Email") - .SetCreationTimestampMs(100) - .SetTtlMs(1000) - .AddStringProperty("subject", "fool") - .Build(); +TEST_F(IcingSearchEngineTest, GetAllNamespaces) { + DocumentProto namespace1 = DocumentBuilder() + .SetKey("namespace1", "uri") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + DocumentProto namespace2_uri1 = DocumentBuilder() + .SetKey("namespace2", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + DocumentProto namespace2_uri2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + + DocumentProto namespace3 = DocumentBuilder() + .SetKey("namespace3", "uri") + .SetSchema("Message") + .AddStringProperty("body", "message body") + .SetCreationTimestampMs(100) + .SetTtlMs(500) + .Build(); { + // Some arbitrary time that's less than all the document's creation time + + // ttl auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(400); + fake_clock->SetSystemTimeMilliseconds(500); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), std::make_unique(), std::move(fake_clock), GetTestJniCache()); - EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), - ProtoIsOk()); - - ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); - - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - - // namespace1 has this suggestion - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.add_namespace_filters("namespace1"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); - - // namespace2 has this suggestion - suggestion_spec.clear_namespace_filters(); - suggestion_spec.add_namespace_filters("namespace2"); - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); + + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // No namespaces exist yet + GetAllNamespacesResultProto result = icing.GetAllNamespaces(); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.namespaces(), IsEmpty()); + + ASSERT_THAT(icing.Put(namespace1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(namespace2_uri1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(namespace2_uri2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(namespace3).status(), ProtoIsOk()); + + // All namespaces should exist now + result = icing.GetAllNamespaces(); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.namespaces(), + UnorderedElementsAre("namespace1", "namespace2", "namespace3")); + + // After deleting namespace2_uri1 document, we still have namespace2_uri2 in + // "namespace2" so it should still show up + ASSERT_THAT(icing.Delete("namespace2", "uri1").status(), ProtoIsOk()); + + result = icing.GetAllNamespaces(); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.namespaces(), + UnorderedElementsAre("namespace1", "namespace2", "namespace3")); + + // After deleting namespace2_uri2 document, we no longer have any documents + // in "namespace2" + ASSERT_THAT(icing.Delete("namespace2", "uri2").status(), ProtoIsOk()); + + result = icing.GetAllNamespaces(); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.namespaces(), + UnorderedElementsAre("namespace1", "namespace3")); } + // We reinitialize here so we can feed in a fake clock this time { - // Time needs to be past document1 creation time (100) + ttl (500) for it - // to count as "expired". document2 is not expired since its ttl is 1000. + // Time needs to be past namespace3's creation time (100) + ttl (500) for it + // to count as "expired" auto fake_clock = std::make_unique(); - fake_clock->SetSystemTimeMilliseconds(800); + fake_clock->SetSystemTimeMilliseconds(1000); TestIcingSearchEngine icing(GetDefaultIcingOptions(), std::make_unique(), @@ -10193,64 +870,31 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ExpiredTest) { std::move(fake_clock), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("f"); - suggestion_spec.add_namespace_filters("namespace1"); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - // Now namespace1 will return empty - suggestion_spec.clear_namespace_filters(); - suggestion_spec.add_namespace_filters("namespace1"); - SuggestionResponse response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), IsEmpty()); - - // namespace2 still has this suggestion - SuggestionResponse::Suggestion suggestionFool; - suggestionFool.set_query("fool"); - - suggestion_spec.add_namespace_filters("namespace2"); - response = icing.SearchSuggestions(suggestion_spec); - ASSERT_THAT(response.status(), ProtoIsOk()); - ASSERT_THAT(response.suggestions(), - UnorderedElementsAre(EqualsProto(suggestionFool))); + // Only valid document left is the one in "namespace1" + GetAllNamespacesResultProto result = icing.GetAllNamespaces(); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.namespaces(), UnorderedElementsAre("namespace1")); } } -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_emptyPrefix) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix(""); - suggestion_spec.set_num_to_return(10); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); - - ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - -TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) { +TEST_F(IcingSearchEngineTest, StorageInfoTest) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); - SuggestionSpecProto suggestion_spec; - suggestion_spec.set_prefix("prefix"); - suggestion_spec.set_num_to_return(0); - suggestion_spec.mutable_scoring_spec()->set_scoring_match_type( - TermMatchType::PREFIX); - suggestion_spec.mutable_scoring_spec()->set_rank_by( - SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT); + // Create three documents. + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); - ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(), - ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); + // Ensure that total_storage_size is set. All the other stats are covered by + // the classes that generate them. + StorageInfoResultProto result = icing.GetStorageInfo(); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0)); } TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityBasicSucceeds) { @@ -10328,539 +972,6 @@ TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) { ASSERT_THAT(result.status(), ProtoIsOk()); } -TEST_F(IcingSearchEngineTest, IcingShouldWorkFor64Sections) { - // Create a schema with 64 sections - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - // Person has 4 sections. - .SetType("Person") - .AddProperty(PropertyConfigBuilder() - .SetName("firstName") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("lastName") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("emailAddress") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("phoneNumber") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(SchemaTypeConfigBuilder() - // Email has 16 sections. - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("date") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("time") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("receiver") - .SetDataTypeDocument( - "Person", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("cc") - .SetDataTypeDocument( - "Person", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_REPEATED))) - .AddType(SchemaTypeConfigBuilder() - // EmailCollection has 64 sections. - .SetType("EmailCollection") - .AddProperty( - PropertyConfigBuilder() - .SetName("email1") - .SetDataTypeDocument( - "Email", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("email2") - .SetDataTypeDocument( - "Email", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("email3") - .SetDataTypeDocument( - "Email", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("email4") - .SetDataTypeDocument( - "Email", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - - DocumentProto person1 = - DocumentBuilder() - .SetKey("namespace", "person1") - .SetSchema("Person") - .AddStringProperty("firstName", "first1") - .AddStringProperty("lastName", "last1") - .AddStringProperty("emailAddress", "email1@gmail.com") - .AddStringProperty("phoneNumber", "000-000-001") - .Build(); - DocumentProto person2 = - DocumentBuilder() - .SetKey("namespace", "person2") - .SetSchema("Person") - .AddStringProperty("firstName", "first2") - .AddStringProperty("lastName", "last2") - .AddStringProperty("emailAddress", "email2@gmail.com") - .AddStringProperty("phoneNumber", "000-000-002") - .Build(); - DocumentProto person3 = - DocumentBuilder() - .SetKey("namespace", "person3") - .SetSchema("Person") - .AddStringProperty("firstName", "first3") - .AddStringProperty("lastName", "last3") - .AddStringProperty("emailAddress", "email3@gmail.com") - .AddStringProperty("phoneNumber", "000-000-003") - .Build(); - DocumentProto email1 = DocumentBuilder() - .SetKey("namespace", "email1") - .SetSchema("Email") - .AddStringProperty("body", "test body") - .AddStringProperty("subject", "test subject") - .AddStringProperty("date", "2022-08-01") - .AddStringProperty("time", "1:00 PM") - .AddDocumentProperty("sender", person1) - .AddDocumentProperty("receiver", person2) - .AddDocumentProperty("cc", person3) - .Build(); - DocumentProto email2 = DocumentBuilder() - .SetKey("namespace", "email2") - .SetSchema("Email") - .AddStringProperty("body", "test body") - .AddStringProperty("subject", "test subject") - .AddStringProperty("date", "2022-08-02") - .AddStringProperty("time", "2:00 PM") - .AddDocumentProperty("sender", person2) - .AddDocumentProperty("receiver", person1) - .AddDocumentProperty("cc", person3) - .Build(); - DocumentProto email3 = DocumentBuilder() - .SetKey("namespace", "email3") - .SetSchema("Email") - .AddStringProperty("body", "test body") - .AddStringProperty("subject", "test subject") - .AddStringProperty("date", "2022-08-03") - .AddStringProperty("time", "3:00 PM") - .AddDocumentProperty("sender", person3) - .AddDocumentProperty("receiver", person1) - .AddDocumentProperty("cc", person2) - .Build(); - DocumentProto email4 = DocumentBuilder() - .SetKey("namespace", "email4") - .SetSchema("Email") - .AddStringProperty("body", "test body") - .AddStringProperty("subject", "test subject") - .AddStringProperty("date", "2022-08-04") - .AddStringProperty("time", "4:00 PM") - .AddDocumentProperty("sender", person3) - .AddDocumentProperty("receiver", person2) - .AddDocumentProperty("cc", person1) - .Build(); - DocumentProto email_collection = - DocumentBuilder() - .SetKey("namespace", "email_collection") - .SetSchema("EmailCollection") - .AddDocumentProperty("email1", email1) - .AddDocumentProperty("email2", email2) - .AddDocumentProperty("email3", email3) - .AddDocumentProperty("email4", email4) - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .Build(); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email_collection).status(), ProtoIsOk()); - - const std::vector query_terms = { - "first1", "last2", "email3@gmail.com", "000-000-001", - "body", "subject", "2022-08-02", "3\\:00"}; - SearchResultProto expected_document; - expected_document.mutable_status()->set_code(StatusProto::OK); - *expected_document.mutable_results()->Add()->mutable_document() = - email_collection; - for (const std::string& query_term : query_terms) { - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query(query_term); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(expected_document)); - } - - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("foo"); - SearchResultProto expected_no_documents; - expected_no_documents.mutable_status()->set_code(StatusProto::OK); - SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); - EXPECT_THAT(actual_results, - EqualsSearchResultIgnoreStatsAndScores(expected_no_documents)); -} - -TEST_F(IcingSearchEngineTest, JoinByQualifiedId) { - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Person") - .AddProperty(PropertyConfigBuilder() - .SetName("firstName") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("lastName") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("emailAddress") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("subject") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("personQualifiedId") - .SetDataTypeJoinableString( - JOINABLE_VALUE_TYPE_QUALIFIED_ID) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - - DocumentProto person1 = - DocumentBuilder() - .SetKey("pkg$db/namespace", "person1") - .SetSchema("Person") - .AddStringProperty("firstName", "first1") - .AddStringProperty("lastName", "last1") - .AddStringProperty("emailAddress", "email1@gmail.com") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .SetScore(1) - .Build(); - DocumentProto person2 = - DocumentBuilder() - .SetKey("pkg$db/namespace", "person2") - .SetSchema("Person") - .AddStringProperty("firstName", "first2") - .AddStringProperty("lastName", "last2") - .AddStringProperty("emailAddress", "email2@gmail.com") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .SetScore(2) - .Build(); - DocumentProto person3 = - DocumentBuilder() - .SetKey(R"(pkg$db/name#space\\)", "person3") - .SetSchema("Person") - .AddStringProperty("firstName", "first3") - .AddStringProperty("lastName", "last3") - .AddStringProperty("emailAddress", "email3@gmail.com") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .SetScore(3) - .Build(); - - DocumentProto email1 = - DocumentBuilder() - .SetKey("namespace", "email1") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 1") - .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .SetScore(3) - .Build(); - DocumentProto email2 = - DocumentBuilder() - .SetKey("namespace", "email2") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 2") - .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .SetScore(2) - .Build(); - DocumentProto email3 = - DocumentBuilder() - .SetKey("namespace", "email3") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 3") - .AddStringProperty("personQualifiedId", - R"(pkg$db/name\#space\\\\#person3)") // escaped - .SetCreationTimestampMs(kDefaultCreationTimestampMs) - .SetScore(1) - .Build(); - - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(person3).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); - ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk()); - - // Parent SearchSpec - SearchSpecProto search_spec; - search_spec.set_term_match_type(TermMatchType::PREFIX); - search_spec.set_query("firstName:first"); - - // JoinSpec - JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); - join_spec->set_parent_property_expression( - std::string(JoinProcessor::kQualifiedIdExpr)); - join_spec->set_child_property_expression("personQualifiedId"); - join_spec->set_aggregation_scoring_strategy( - JoinSpecProto::AggregationScoringStrategy::MAX); - JoinSpecProto::NestedSpecProto* nested_spec = - join_spec->mutable_nested_spec(); - SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); - nested_search_spec->set_term_match_type(TermMatchType::PREFIX); - nested_search_spec->set_query("subject:test"); - *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); - *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - - // Parent ScoringSpec - ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); - - // Parent ResultSpec - ResultSpecProto result_spec; - result_spec.set_num_per_page(1); - - // Since we: - // - Use MAX for aggregation scoring strategy. - // - (Default) use DOCUMENT_SCORE to score child documents. - // - (Default) use DESC as the ranking order. - // - // person1 + email1 should have the highest aggregated score (3) and be - // returned first. person2 + email2 (aggregated score = 2) should be the - // second, and person3 + email3 (aggregated score = 1) should be the last. - SearchResultProto expected_result1; - expected_result1.mutable_status()->set_code(StatusProto::OK); - SearchResultProto::ResultProto* result_proto1 = - expected_result1.mutable_results()->Add(); - *result_proto1->mutable_document() = person1; - *result_proto1->mutable_joined_results()->Add()->mutable_document() = email1; - - SearchResultProto expected_result2; - expected_result2.mutable_status()->set_code(StatusProto::OK); - SearchResultProto::ResultProto* result_proto2 = - expected_result2.mutable_results()->Add(); - *result_proto2->mutable_document() = person2; - *result_proto2->mutable_joined_results()->Add()->mutable_document() = email2; - - SearchResultProto expected_result3; - expected_result3.mutable_status()->set_code(StatusProto::OK); - SearchResultProto::ResultProto* result_proto3 = - expected_result3.mutable_results()->Add(); - *result_proto3->mutable_document() = person3; - *result_proto3->mutable_joined_results()->Add()->mutable_document() = email3; - - SearchResultProto result1 = - icing.Search(search_spec, scoring_spec, result_spec); - uint64_t next_page_token = result1.next_page_token(); - EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); - expected_result1.set_next_page_token(next_page_token); - EXPECT_THAT(result1, - EqualsSearchResultIgnoreStatsAndScores(expected_result1)); - - SearchResultProto result2 = icing.GetNextPage(next_page_token); - next_page_token = result2.next_page_token(); - EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); - expected_result2.set_next_page_token(next_page_token); - EXPECT_THAT(result2, - EqualsSearchResultIgnoreStatsAndScores(expected_result2)); - - SearchResultProto result3 = icing.GetNextPage(next_page_token); - next_page_token = result3.next_page_token(); - EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken)); - EXPECT_THAT(result3, - EqualsSearchResultIgnoreStatsAndScores(expected_result3)); -} - -TEST_F(IcingSearchEngineTest, NumericFilterAdvancedQuerySucceeds) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Create the schema and document store - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("transaction") - .AddProperty(PropertyConfigBuilder() - .SetName("price") - .SetDataTypeInt64(NUMERIC_MATCH_RANGE) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("cost") - .SetDataTypeInt64(NUMERIC_MATCH_RANGE) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document_one = DocumentBuilder() - .SetKey("namespace", "1") - .SetSchema("transaction") - .SetCreationTimestampMs(1) - .AddInt64Property("price", 10) - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = DocumentBuilder() - .SetKey("namespace", "2") - .SetSchema("transaction") - .SetCreationTimestampMs(1) - .AddInt64Property("price", 25) - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - DocumentProto document_three = DocumentBuilder() - .SetKey("namespace", "3") - .SetSchema("transaction") - .SetCreationTimestampMs(1) - .AddInt64Property("cost", 2) - .Build(); - ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("price < 20"); - search_spec.set_search_type( - SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); - search_spec.add_enabled_features(std::string(kNumericSearchFeature)); - - SearchResultProto results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - ASSERT_THAT(results.results(), SizeIs(1)); - EXPECT_THAT(results.results(0).document(), EqualsProto(document_one)); - - search_spec.set_query("price == 25"); - results = icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - ASSERT_THAT(results.results(), SizeIs(1)); - EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); - - search_spec.set_query("cost > 2"); - results = icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - EXPECT_THAT(results.results(), IsEmpty()); - - search_spec.set_query("cost >= 2"); - results = icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - ASSERT_THAT(results.results(), SizeIs(1)); - EXPECT_THAT(results.results(0).document(), EqualsProto(document_three)); - - search_spec.set_query("price <= 25"); - results = icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - ASSERT_THAT(results.results(), SizeIs(2)); - EXPECT_THAT(results.results(0).document(), EqualsProto(document_two)); - EXPECT_THAT(results.results(1).document(), EqualsProto(document_one)); -} - -TEST_F(IcingSearchEngineTest, NumericFilterOldQueryFails) { - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); - ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); - - // Create the schema and document store - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("transaction") - .AddProperty(PropertyConfigBuilder() - .SetName("price") - .SetDataTypeInt64(NUMERIC_MATCH_RANGE) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("cost") - .SetDataTypeInt64(NUMERIC_MATCH_RANGE) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); - - DocumentProto document_one = DocumentBuilder() - .SetKey("namespace", "1") - .SetSchema("transaction") - .SetCreationTimestampMs(1) - .AddInt64Property("price", 10) - .Build(); - ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); - - DocumentProto document_two = DocumentBuilder() - .SetKey("namespace", "2") - .SetSchema("transaction") - .SetCreationTimestampMs(1) - .AddInt64Property("price", 25) - .Build(); - ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); - - DocumentProto document_three = DocumentBuilder() - .SetKey("namespace", "3") - .SetSchema("transaction") - .SetCreationTimestampMs(1) - .AddInt64Property("cost", 2) - .Build(); - ASSERT_THAT(icing.Put(document_three).status(), ProtoIsOk()); - - SearchSpecProto search_spec; - search_spec.set_query("price < 20"); - search_spec.set_search_type(SearchSpecProto::SearchType::ICING_RAW_QUERY); - search_spec.add_enabled_features(std::string(kNumericSearchFeature)); - - SearchResultProto results = - icing.Search(search_spec, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); - EXPECT_THAT(results.status(), ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); -} - } // namespace } // namespace lib } // namespace icing diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc index 6123f47..6608e44 100644 --- a/icing/index/index-processor_benchmark.cc +++ b/icing/index/index-processor_benchmark.cc @@ -70,6 +70,8 @@ namespace lib { namespace { +using ::testing::IsTrue; + // Creates a fake type config with 10 properties (p0 - p9) void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) { type_config->set_schema_type("Fake_Type"); @@ -79,7 +81,7 @@ void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) { property->set_property_name( IcingStringUtil::StringPrintf("p%d", i)); // p0 - p9 property->set_data_type(PropertyConfigProto::DataType::STRING); - property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED); + property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); property->mutable_string_indexing_config()->set_term_match_type( TermMatchType::EXACT_ONLY); property->mutable_string_indexing_config()->set_tokenizer_type( @@ -151,10 +153,14 @@ std::unique_ptr CreateNormalizer() { .ValueOrDie(); } -std::unique_ptr CreateSchemaStore(const Clock* clock) { - Filesystem filesystem; +std::unique_ptr CreateSchemaStore(const Filesystem& filesystem, + const Clock* clock, + const std::string& base_dir) { + std::string schema_store_dir = base_dir + "/schema_store_test"; + filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()); + std::unique_ptr schema_store = - SchemaStore::Create(&filesystem, GetTestTempDir(), clock).ValueOrDie(); + SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie(); SchemaProto schema; CreateFakeTypeConfig(schema.add_types()); @@ -167,8 +173,8 @@ std::unique_ptr CreateSchemaStore(const Clock* clock) { return schema_store; } -void CleanUp(const Filesystem& filesystem, const std::string& index_dir) { - filesystem.DeleteDirectoryRecursively(index_dir.c_str()); +void CleanUp(const Filesystem& filesystem, const std::string& base_dir) { + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); } void BM_IndexDocumentWithOneProperty(benchmark::State& state) { @@ -180,20 +186,26 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) { IcingFilesystem icing_filesystem; Filesystem filesystem; - std::string index_dir = GetTestTempDir() + "/index_test/"; + std::string base_dir = GetTestTempDir() + "/index_processor_benchmark"; + std::string index_dir = base_dir + "/index_test/"; + std::string integer_index_dir = base_dir + "/integer_index_test/"; - CleanUp(filesystem, index_dir); + CleanUp(filesystem, base_dir); + ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()), + IsTrue()); std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); - std::unique_ptr> integer_index = - std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> integer_index, + DummyNumericIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); std::unique_ptr normalizer = CreateNormalizer(); Clock clock; - std::unique_ptr schema_store = CreateSchemaStore(&clock); + std::unique_ptr schema_store = + CreateSchemaStore(filesystem, &clock, base_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index_processor, IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), @@ -210,7 +222,14 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) { index_processor->IndexDocument(tokenized_document, document_id++)); } - CleanUp(filesystem, index_dir); + index_processor.reset(); + schema_store.reset(); + normalizer.reset(); + language_segmenter.reset(); + integer_index.reset(); + index.reset(); + + CleanUp(filesystem, base_dir); } BENCHMARK(BM_IndexDocumentWithOneProperty) ->Arg(1000) @@ -237,20 +256,26 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) { IcingFilesystem icing_filesystem; Filesystem filesystem; - std::string index_dir = GetTestTempDir() + "/index_test/"; + std::string base_dir = GetTestTempDir() + "/index_processor_benchmark"; + std::string index_dir = base_dir + "/index_test/"; + std::string integer_index_dir = base_dir + "/integer_index_test/"; - CleanUp(filesystem, index_dir); + CleanUp(filesystem, base_dir); + ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()), + IsTrue()); std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); - std::unique_ptr> integer_index = - std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> integer_index, + DummyNumericIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); std::unique_ptr normalizer = CreateNormalizer(); Clock clock; - std::unique_ptr schema_store = CreateSchemaStore(&clock); + std::unique_ptr schema_store = + CreateSchemaStore(filesystem, &clock, base_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index_processor, IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), @@ -269,7 +294,14 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) { index_processor->IndexDocument(tokenized_document, document_id++)); } - CleanUp(filesystem, index_dir); + index_processor.reset(); + schema_store.reset(); + normalizer.reset(); + language_segmenter.reset(); + integer_index.reset(); + index.reset(); + + CleanUp(filesystem, base_dir); } BENCHMARK(BM_IndexDocumentWithTenProperties) ->Arg(1000) @@ -296,20 +328,26 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) { IcingFilesystem icing_filesystem; Filesystem filesystem; - std::string index_dir = GetTestTempDir() + "/index_test/"; + std::string base_dir = GetTestTempDir() + "/index_processor_benchmark"; + std::string index_dir = base_dir + "/index_test/"; + std::string integer_index_dir = base_dir + "/integer_index_test/"; - CleanUp(filesystem, index_dir); + CleanUp(filesystem, base_dir); + ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()), + IsTrue()); std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); - std::unique_ptr> integer_index = - std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> integer_index, + DummyNumericIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); std::unique_ptr normalizer = CreateNormalizer(); Clock clock; - std::unique_ptr schema_store = CreateSchemaStore(&clock); + std::unique_ptr schema_store = + CreateSchemaStore(filesystem, &clock, base_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index_processor, IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), @@ -328,7 +366,14 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) { index_processor->IndexDocument(tokenized_document, document_id++)); } - CleanUp(filesystem, index_dir); + index_processor.reset(); + schema_store.reset(); + normalizer.reset(); + language_segmenter.reset(); + integer_index.reset(); + index.reset(); + + CleanUp(filesystem, base_dir); } BENCHMARK(BM_IndexDocumentWithDiacriticLetters) ->Arg(1000) @@ -355,20 +400,26 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) { IcingFilesystem icing_filesystem; Filesystem filesystem; - std::string index_dir = GetTestTempDir() + "/index_test/"; + std::string base_dir = GetTestTempDir() + "/index_processor_benchmark"; + std::string index_dir = base_dir + "/index_test/"; + std::string integer_index_dir = base_dir + "/integer_index_test/"; - CleanUp(filesystem, index_dir); + CleanUp(filesystem, base_dir); + ASSERT_THAT(filesystem.CreateDirectoryRecursively(base_dir.c_str()), + IsTrue()); std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); - std::unique_ptr> integer_index = - std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> integer_index, + DummyNumericIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); std::unique_ptr normalizer = CreateNormalizer(); Clock clock; - std::unique_ptr schema_store = CreateSchemaStore(&clock); + std::unique_ptr schema_store = + CreateSchemaStore(filesystem, &clock, base_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr index_processor, IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), @@ -386,7 +437,14 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) { index_processor->IndexDocument(tokenized_document, document_id++)); } - CleanUp(filesystem, index_dir); + index_processor.reset(); + schema_store.reset(); + normalizer.reset(); + language_segmenter.reset(); + integer_index.reset(); + index.reset(); + + CleanUp(filesystem, base_dir); } BENCHMARK(BM_IndexDocumentWithHiragana) ->Arg(1000) diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc index 04f27a7..626efa7 100644 --- a/icing/index/index-processor_test.cc +++ b/icing/index/index-processor_test.cc @@ -132,6 +132,7 @@ using DataType = PropertyConfigProto::DataType; using ::testing::ElementsAre; using ::testing::Eq; using ::testing::IsEmpty; +using ::testing::IsTrue; using ::testing::SizeIs; using ::testing::Test; @@ -150,12 +151,21 @@ class IndexProcessorTest : public Test { GetTestFilePath("icing/icu.dat"))); } - index_dir_ = GetTestTempDir() + "/index_test"; + base_dir_ = GetTestTempDir() + "/index_processor_test"; + ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), + IsTrue()); + + index_dir_ = base_dir_ + "/index"; + integer_index_dir_ = base_dir_ + "/integer_index"; + schema_store_dir_ = base_dir_ + "/schema_store"; + Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024); ICING_ASSERT_OK_AND_ASSIGN( index_, Index::Create(options, &filesystem_, &icing_filesystem_)); - integer_index_ = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + integer_index_, + DummyNumericIndex::Create(filesystem_, integer_index_dir_)); language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); ICING_ASSERT_OK_AND_ASSIGN( @@ -167,12 +177,11 @@ class IndexProcessorTest : public Test { normalizer_factory::Create( /*max_term_byte_size=*/std::numeric_limits::max())); - std::string schema_store_dir = GetTestTempDir() + "/schema_store"; ASSERT_TRUE( - filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str())); + filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, - SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); SchemaProto schema = SchemaBuilder() .AddType( @@ -257,7 +266,14 @@ class IndexProcessorTest : public Test { } void TearDown() override { - filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()); + index_processor_.reset(); + schema_store_.reset(); + normalizer_.reset(); + lang_segmenter_.reset(); + integer_index_.reset(); + index_.reset(); + + filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()); } std::unique_ptr mock_icing_filesystem_; @@ -265,12 +281,15 @@ class IndexProcessorTest : public Test { Filesystem filesystem_; IcingFilesystem icing_filesystem_; FakeClock fake_clock_; + std::string base_dir_; std::string index_dir_; + std::string integer_index_dir_; + std::string schema_store_dir_; - std::unique_ptr lang_segmenter_; - std::unique_ptr normalizer_; std::unique_ptr index_; std::unique_ptr> integer_index_; + std::unique_ptr lang_segmenter_; + std::unique_ptr normalizer_; std::unique_ptr schema_store_; std::unique_ptr index_processor_; }; diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc index 4349cc9..c7b6380 100644 --- a/icing/index/index_test.cc +++ b/icing/index/index_test.cc @@ -79,6 +79,7 @@ class IndexTest : public Test { } void TearDown() override { + index_.reset(); icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()); } diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc index 0a3317c..b01f278 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc @@ -78,7 +78,7 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() { ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->GetTermId(term_)); ICING_ASSIGN_OR_RETURN(uint32_t term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - lite_index_->AppendHits( + lite_index_->FetchHits( term_id, section_restrict_mask_, /*only_from_prefix_sections=*/false, /*score_by=*/ @@ -105,7 +105,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() { ICING_ASSIGN_OR_RETURN( uint32_t term_id, term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE)); - lite_index_->AppendHits( + lite_index_->FetchHits( term_id, section_restrict_mask_, /*only_from_prefix_sections=*/!exact_match, /*score_by=*/ diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index 1ea945c..bf54dec 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -30,6 +30,7 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/mutex.h" #include "icing/absl_ports/str_cat.h" #include "icing/file/filesystem.h" #include "icing/index/hit/doc-hit-info.h" @@ -114,6 +115,7 @@ libtextclassifier3::Status LiteIndex::Initialize() { uint64_t file_size; IcingTimer timer; + absl_ports::unique_lock l(&mutex_); if (!lexicon_.CreateIfNotExist(options_.lexicon_options) || !lexicon_.Init()) { return absl_ports::InternalError("Failed to initialize lexicon trie"); @@ -241,6 +243,7 @@ Crc32 LiteIndex::ComputeChecksum() { libtextclassifier3::Status LiteIndex::Reset() { IcingTimer timer; + absl_ports::unique_lock l(&mutex_); // TODO(b/140436942): When these components have been changed to return errors // they should be propagated from here. lexicon_.Clear(); @@ -253,11 +256,13 @@ libtextclassifier3::Status LiteIndex::Reset() { } void LiteIndex::Warm() { + absl_ports::shared_lock l(&mutex_); hit_buffer_.Warm(); lexicon_.Warm(); } libtextclassifier3::Status LiteIndex::PersistToDisk() { + absl_ports::unique_lock l(&mutex_); bool success = true; if (!lexicon_.Sync()) { ICING_VLOG(1) << "Failed to sync the lexicon."; @@ -279,6 +284,7 @@ void LiteIndex::UpdateChecksum() { libtextclassifier3::StatusOr LiteIndex::InsertTerm( const std::string& term, TermMatchType::Code term_match_type, NamespaceId namespace_id) { + absl_ports::unique_lock l(&mutex_); uint32_t tvi; libtextclassifier3::Status status = lexicon_.Insert(term.c_str(), "", &tvi, false); @@ -287,13 +293,19 @@ libtextclassifier3::StatusOr LiteIndex::InsertTerm( << status.error_message(); return status; } - ICING_RETURN_IF_ERROR(UpdateTermProperties( + ICING_RETURN_IF_ERROR(UpdateTermPropertiesImpl( tvi, term_match_type == TermMatchType::PREFIX, namespace_id)); return tvi; } libtextclassifier3::Status LiteIndex::UpdateTermProperties( uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) { + absl_ports::unique_lock l(&mutex_); + return UpdateTermPropertiesImpl(tvi, hasPrefixHits, namespace_id); +} + +libtextclassifier3::Status LiteIndex::UpdateTermPropertiesImpl( + uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) { if (hasPrefixHits && !lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) { return absl_ports::ResourceExhaustedError( @@ -309,6 +321,7 @@ libtextclassifier3::Status LiteIndex::UpdateTermProperties( } libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) { + absl_ports::unique_lock l(&mutex_); if (is_full()) { return absl_ports::ResourceExhaustedError("Hit buffer is full!"); } @@ -329,6 +342,7 @@ libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) { libtextclassifier3::StatusOr LiteIndex::GetTermId( const std::string& term) const { + absl_ports::shared_lock l(&mutex_); char dummy; uint32_t tvi; if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) { @@ -338,7 +352,7 @@ libtextclassifier3::StatusOr LiteIndex::GetTermId( return tvi; } -int LiteIndex::AppendHits( +int LiteIndex::FetchHits( uint32_t term_id, SectionIdMask section_id_mask, bool only_from_prefix_sections, SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by, @@ -349,9 +363,27 @@ int LiteIndex::AppendHits( DocumentId last_document_id = kInvalidDocumentId; // Record whether the last document belongs to the given namespaces. bool is_last_document_desired = false; - for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) { - TermIdHitPair term_id_hit_pair( - hit_buffer_.array_cast()[idx]); + + if (NeedSort()) { + // Transition from shared_lock in NeedSort to unique_lock here is safe + // because it doesn't hurt to sort again if sorting was done already by + // another thread after NeedSort is evaluated. NeedSort is called before + // sorting to improve concurrency as threads can avoid acquiring the unique + // lock if no sorting is needed. + absl_ports::unique_lock l(&mutex_); + SortHits(); + } + + // This downgrade from an unique_lock to a shared_lock is safe because we're + // searching for the term in the searchable (sorted) section of the HitBuffer + // only in Seek(). + // Any operations that might execute in between the transition of downgrading + // the lock here are guaranteed not to alter the searchable section (or the + // LiteIndex due to a global lock in IcingSearchEngine). + absl_ports::shared_lock l(&mutex_); + for (uint32_t idx = Seek(term_id); idx < header_->searchable_end(); idx++) { + TermIdHitPair term_id_hit_pair = + hit_buffer_.array_cast()[idx]; if (term_id_hit_pair.term_id() != term_id) break; const Hit& hit = term_id_hit_pair.hit(); @@ -422,7 +454,7 @@ libtextclassifier3::StatusOr LiteIndex::ScoreHits( uint32_t term_id, SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by, const SuggestionResultChecker* suggestion_result_checker) { - return AppendHits(term_id, kSectionIdMaskAll, + return FetchHits(term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false, score_by, suggestion_result_checker, /*hits_out=*/nullptr); @@ -434,6 +466,7 @@ bool LiteIndex::is_full() const { } std::string LiteIndex::GetDebugInfo(DebugInfoVerbosity::Code verbosity) { + absl_ports::unique_lock l(&mutex_); std::string res; std::string lexicon_info; lexicon_.GetDebugInfo(verbosity, &lexicon_info); @@ -468,6 +501,7 @@ libtextclassifier3::StatusOr LiteIndex::GetElementsSize() const { IndexStorageInfoProto LiteIndex::GetStorageInfo( IndexStorageInfoProto storage_info) const { + absl_ports::shared_lock l(&mutex_); int64_t header_and_hit_buffer_file_size = filesystem_->GetFileSize(hit_buffer_fd_.get()); storage_info.set_lite_index_hit_buffer_size( @@ -512,9 +546,7 @@ void LiteIndex::SortHits() { UpdateChecksum(); } -uint32_t LiteIndex::Seek(uint32_t term_id) { - SortHits(); - +uint32_t LiteIndex::Seek(uint32_t term_id) const { // Binary search for our term_id. Make sure we get the first // element. Using kBeginSortValue ensures this for the hit value. TermIdHitPair term_id_hit_pair( @@ -522,14 +554,21 @@ uint32_t LiteIndex::Seek(uint32_t term_id) { const TermIdHitPair::Value* array = hit_buffer_.array_cast(); + if (header_->searchable_end() != header_->cur_size()) { + ICING_LOG(WARNING) << "Lite index: hit buffer searchable end != current " + << "size during Seek(): " + << header_->searchable_end() << " vs " + << header_->cur_size(); + } const TermIdHitPair::Value* ptr = std::lower_bound( - array, array + header_->cur_size(), term_id_hit_pair.value()); + array, array + header_->searchable_end(), term_id_hit_pair.value()); return ptr - array; } libtextclassifier3::Status LiteIndex::Optimize( const std::vector& document_id_old_to_new, const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) { + absl_ports::unique_lock l(&mutex_); header_->set_last_added_docid(new_last_added_document_id); if (header_->cur_size() == 0) { return libtextclassifier3::Status::OK; diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index e4fb686..c7255fd 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -27,6 +27,8 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/mutex.h" +#include "icing/absl_ports/thread_annotations.h" #include "icing/file/filesystem.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/hit/hit.h" @@ -52,6 +54,10 @@ namespace icing { namespace lib { +// The LiteIndex is go/thread-compatible. Operations on the same data member +// object interfere with each other, unless they are guaranteed not to mutate +// the object (In the case of LiteIndex, this means all const methods, +// FetchHits and ScoreHits). class LiteIndex { public: // An entry in the hit buffer. @@ -72,25 +78,22 @@ class LiteIndex { // Resets all internal members of the index. Returns OK if all operations were // successful. - libtextclassifier3::Status Reset(); + libtextclassifier3::Status Reset() ICING_LOCKS_EXCLUDED(mutex_); // Advises the OS to cache pages in the index, which will be accessed for a // query soon. - void Warm(); + void Warm() ICING_LOCKS_EXCLUDED(mutex_); // Syncs all modified files in the index to disk. // // Returns: // OK on success // INTERNAL on I/O error - libtextclassifier3::Status PersistToDisk(); - - // Calculate the checksum of all sub-components of the LiteIndex - Crc32 ComputeChecksum(); + libtextclassifier3::Status PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_); // Returns term_id if term found, NOT_FOUND otherwise. libtextclassifier3::StatusOr GetTermId( - const std::string& term) const; + const std::string& term) const ICING_LOCKS_EXCLUDED(mutex_); // Returns an iterator for all terms for which 'prefix' is a prefix. class PrefixIterator { @@ -109,7 +112,11 @@ class LiteIndex { IcingDynamicTrie::Iterator delegate_; }; - PrefixIterator FindTermPrefixes(const std::string& prefix) const { + // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate any + // previously returned PrefixIterator. + PrefixIterator FindTermPrefixes(const std::string& prefix) const + ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str())); } @@ -120,7 +127,7 @@ class LiteIndex { // RESOURCE_EXHAUSTED if lexicon is full or no disk space is available libtextclassifier3::StatusOr InsertTerm( const std::string& term, TermMatchType::Code term_match_type, - NamespaceId namespace_id); + NamespaceId namespace_id) ICING_LOCKS_EXCLUDED(mutex_); // Updates term properties by setting hasPrefixHits and namespace id of the // term. @@ -130,7 +137,8 @@ class LiteIndex { // RESOURCE_EXHAUSTED if no disk space is available libtextclassifier3::Status UpdateTermProperties(uint32_t tvi, bool hasPrefixHits, - NamespaceId namespace_id); + NamespaceId namespace_id) + ICING_LOCKS_EXCLUDED(mutex_); // Append hit to buffer. term_id must be encoded using the same term_id_codec // supplied to the index constructor. @@ -138,7 +146,8 @@ class LiteIndex { // - OK if hit was successfully added // - RESOURCE_EXHAUSTED if hit could not be added (either due to hit buffer // or file system capacity reached). - libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit); + libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit) + ICING_LOCKS_EXCLUDED(mutex_); // Add all hits with term_id from the sections specified in section_id_mask, // skipping hits in non-prefix sections if only_from_prefix_sections is true, @@ -147,33 +156,35 @@ class LiteIndex { // is nullptr. // // Only those hits which belongs to the given namespaces will be counted and - // appended. A nullptr namespace checker will disable this check. + // fetched. A nullptr namespace checker will disable this check. // // Returns the score of hits that would be added to hits_out according the // given score_by. - int AppendHits( + int FetchHits( uint32_t term_id, SectionIdMask section_id_mask, bool only_from_prefix_sections, SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by, const SuggestionResultChecker* suggestion_result_checker, std::vector* hits_out, - std::vector* term_frequency_out = nullptr); + std::vector* term_frequency_out = nullptr) + ICING_LOCKS_EXCLUDED(mutex_); // Returns the hit count of the term. // Only those hits which belongs to the given namespaces will be counted. libtextclassifier3::StatusOr ScoreHits( uint32_t term_id, SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by, - const SuggestionResultChecker* suggestion_result_checker); + const SuggestionResultChecker* suggestion_result_checker) + ICING_LOCKS_EXCLUDED(mutex_); - // Check if buffer has reached its capacity. - bool is_full() const; + bool empty() const ICING_LOCKS_EXCLUDED(mutex_) { return size() == 0; } - bool empty() const { return size() == 0; } - - uint32_t size() const { return header_->cur_size(); } + uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); + return header_->cur_size(); + } - bool WantsMerge() const { + bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) { return size() >= (options_.hit_buffer_want_merge_bytes / sizeof(TermIdHitPair::Value)); } @@ -224,11 +235,13 @@ class LiteIndex { int end_position_; }; - const_iterator begin() const { + const_iterator begin() const ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); // If the LiteIndex is empty, just return end(). - return empty() ? end() - : const_iterator(hit_buffer_.array_cast(), 0, - header_->cur_size()); + return empty_impl() + ? end() + : const_iterator(hit_buffer_.array_cast(), 0, + header_->cur_size()); } const_iterator end() const { return const_iterator(); } @@ -240,19 +253,25 @@ class LiteIndex { // We keep track of the last added document_id. This is always the largest // document_id that has been added because hits can only be added in order of // increasing document_id. - DocumentId last_added_document_id() const { + DocumentId last_added_document_id() const ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); return header_->last_added_docid(); } - void set_last_added_document_id(DocumentId document_id) const { + void set_last_added_document_id(DocumentId document_id) + ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::unique_lock l(&mutex_); header_->set_last_added_docid(document_id); } + // WARNING: Subsequent calls to AddHit/InsertTerm may invalidate the reference + // returned here. const IcingDynamicTrie& lexicon() const { return lexicon_; } // Returns debug information for the index in out. // verbosity = BASIC, simplest debug information - size of lexicon, hit buffer // verbosity = DETAILED, more detailed debug information from the lexicon. - std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity); + std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity) + ICING_LOCKS_EXCLUDED(mutex_); // Returns the byte size of all the elements held in the index. This excludes // the size of any internal metadata of the index, e.g. the index's header. @@ -260,15 +279,16 @@ class LiteIndex { // Returns: // Byte size on success // INTERNAL_ERROR on IO error - libtextclassifier3::StatusOr GetElementsSize() const; + libtextclassifier3::StatusOr GetElementsSize() const + ICING_LOCKS_EXCLUDED(mutex_); // Takes the provided storage_info, populates the fields related to the lite // index and returns that storage_info. // // If an IO error occurs while trying to calculate the value for a field, then // that field will be set to -1. - IndexStorageInfoProto GetStorageInfo( - IndexStorageInfoProto storage_info) const; + IndexStorageInfoProto GetStorageInfo(IndexStorageInfoProto storage_info) const + ICING_LOCKS_EXCLUDED(mutex_); // Reduces internal file sizes by reclaiming space of deleted documents. // @@ -281,7 +301,8 @@ class LiteIndex { // invalid state and should be cleared. libtextclassifier3::Status Optimize( const std::vector& document_id_old_to_new, - const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id); + const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) + ICING_LOCKS_EXCLUDED(mutex_); private: static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions(); @@ -295,45 +316,78 @@ class LiteIndex { // OK on success // DATA_LOSS if the index was corrupted and cleared // INTERNAL on I/O error - libtextclassifier3::Status Initialize(); + libtextclassifier3::Status Initialize() ICING_LOCKS_EXCLUDED(mutex_); + + bool initialized() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { + return header_ != nullptr; + } + + // Check if the hit buffer has reached its capacity. + bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_); - bool initialized() const { return header_ != nullptr; } + // Non-locking implementation for empty(). + bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { + return size_impl() == 0; + } + + // Non-locking implementation for size(). + bool size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { + return header_->cur_size(); + } + + // Calculate the checksum of all sub-components of the LiteIndex + Crc32 ComputeChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Sets the computed checksum in the header - void UpdateChecksum(); + void UpdateChecksum() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Non-locking implementation for UpdateTermProperties. + libtextclassifier3::Status UpdateTermPropertiesImpl(uint32_t tvi, + bool hasPrefixHits, + NamespaceId namespace_id) + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Whether or not the HitBuffer requires sorting. + bool NeedSort() ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); + return header_->cur_size() - header_->searchable_end() > 0; + } // Sort hits stored in the index. - void SortHits(); + void SortHits() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Returns the position of the first element with term_id, or the size of the - // hit buffer if term_id is not present. - uint32_t Seek(uint32_t term_id); + // Returns the position of the first element with term_id, or the searchable + // end of the hit buffer if term_id is not present. + uint32_t Seek(uint32_t term_id) const ICING_SHARED_LOCKS_REQUIRED(mutex_); // File descriptor that points to where the header and hit buffer are written // to. - ScopedFd hit_buffer_fd_; + ScopedFd hit_buffer_fd_ ICING_GUARDED_BY(mutex_); // Mmapped region past the header that stores the hits. - IcingArrayStorage hit_buffer_; + IcingArrayStorage hit_buffer_ ICING_GUARDED_BY(mutex_); // Crc checksum of the hits, excludes the header. - uint32_t hit_buffer_crc_; + uint32_t hit_buffer_crc_ ICING_GUARDED_BY(mutex_); // Trie that maps indexed terms to their term id - IcingDynamicTrie lexicon_; + IcingDynamicTrie lexicon_ ICING_GUARDED_BY(mutex_); // TODO(b/140437260): Port over to MemoryMappedFile // Memory mapped region of the underlying file that reflects the header. - IcingMMapper header_mmap_; + IcingMMapper header_mmap_ ICING_GUARDED_BY(mutex_); // Wrapper around the mmapped header that contains stats on the lite index. - std::unique_ptr header_; + std::unique_ptr header_ ICING_GUARDED_BY(mutex_); // Options used to initialize the LiteIndex. const Options options_; // TODO(b/139087650) Move to icing::Filesystem const IcingFilesystem* const filesystem_; + + // Used to provide reader and writer locks + mutable absl_ports::shared_mutex mutex_; }; } // namespace lib diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc index 2c29640..c3f52b1 100644 --- a/icing/index/lite/lite-index_test.cc +++ b/icing/index/lite/lite-index_test.cc @@ -56,6 +56,8 @@ class LiteIndexTest : public testing::Test { } void TearDown() override { + term_id_codec_.reset(); + lite_index_.reset(); ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str())); } @@ -82,7 +84,7 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) { ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1)); std::vector hits1; - lite_index_->AppendHits( + lite_index_->FetchHits( foo_term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false, SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, @@ -94,7 +96,7 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) { std::vector hits2; AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker; - lite_index_->AppendHits( + lite_index_->FetchHits( foo_term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false, SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc new file mode 100644 index 0000000..7711f92 --- /dev/null +++ b/icing/index/lite/lite-index_thread-safety_test.cc @@ -0,0 +1,400 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/index/lite/doc-hit-info-iterator-term-lite.h" +#include "icing/index/lite/lite-index.h" +#include "icing/index/term-id-codec.h" +#include "icing/schema/section.h" +#include "icing/store/suggestion-result-checker.h" +#include "icing/testing/always-false-suggestion-result-checker-impl.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Le; +using ::testing::SizeIs; + +// These tests cover concurrent FetchHits operations, as well as interleaving +// AddHit and FetchHits operations. Other usages of the LiteIndex other than +// these scenarios are not guaranteed with to be thread-safe as the LiteIndex is +// go/thread-compatible. +class LiteIndexThreadSafetyTest : public testing::Test { + protected: + void SetUp() override { + index_dir_ = GetTestTempDir() + "/test_dir"; + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str())); + + std::string lite_index_file_name = + index_dir_ + "/test_file.lite-idx-thread-safety.index"; + LiteIndex::Options options(lite_index_file_name, + /*hit_buffer_want_merge_bytes=*/1024 * 1024); + ICING_ASSERT_OK_AND_ASSIGN(lite_index_, + LiteIndex::Create(options, &icing_filesystem_)); + + ICING_ASSERT_OK_AND_ASSIGN( + term_id_codec_, + TermIdCodec::Create( + IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()), + IcingDynamicTrie::max_value_index(options.lexicon_options))); + } + + void TearDown() override { + term_id_codec_.reset(); + lite_index_.reset(); + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str())); + } + + std::string index_dir_; + Filesystem filesystem_; + IcingFilesystem icing_filesystem_; + std::unique_ptr lite_index_; + std::unique_ptr term_id_codec_; +}; + +constexpr NamespaceId kNamespace0 = 0; +constexpr DocumentId kDocumentId0 = 0; +constexpr DocumentId kDocumentId1 = 1; +constexpr SectionId kSectionId0 = 1; +constexpr SectionId kSectionId1 = 0b11; + +static constexpr std::array kCommonWords = { + "the", "and", "for", "that", "this", "with", + "you", "not", "are", "from", "your", "all", + "have", "new", "more", "was", "will", "home", + "can", "about", "page", "has", "search", "free", + "but", "our", "one", "other", "information", "time", + "they", "site", "may", "what", "which", "their", + "news", "out", "use", "any", "there", "see", + "only", "his", "when", "contact", "here", "business", + "who", "web", "also", "now", "help", "get", + "view", "online", "first", "been", "would", "how", + "were", "services", "some", "these", "click", "its", + "like", "service", "than", "find", "price", "date", + "back", "top", "people", "had", "list", "name", + "just", "over", "state", "year", "day", "into", + "email", "two", "health", "world", "next", "used", + "work", "last", "most", "products", "music", "buy", + "data", "make", "them", "should"}; + +TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_singleTerm) { + // Add some hits + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t foo_tvi, + lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0)); + + ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, + term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE)); + Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0)); + ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1)); + + // Create kNumThreads threads to call lite_index_->FetchHits() + // simultaneously. Each thread should get a valid result of 2 hits for the + // term 'foo', and there should be no crash. + constexpr int kNumThreads = 50; + std::vector> hits(kNumThreads); + auto callable = [&](int thread_id) { + lite_index_->FetchHits( + foo_term_id, kSectionIdMaskAll, + /*only_from_prefix_sections=*/false, + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, + /*namespace_checker=*/nullptr, &hits[thread_id]); + }; + // Spawn threads for FetchHits(). + std::vector thread_objs; + for (int i = 0; i < kNumThreads; ++i) { + thread_objs.emplace_back(callable, /*thread_id=*/i); + } + + // Join threads and verify results + for (int i = 0; i < kNumThreads; ++i) { + thread_objs[i].join(); + EXPECT_THAT( + hits[i], + ElementsAre( + EqualsDocHitInfo(kDocumentId1, std::vector{kSectionId0}), + EqualsDocHitInfo(kDocumentId0, + std::vector{kSectionId0}))); + } +} + +TEST_F(LiteIndexThreadSafetyTest, SimultaneousFetchHits_multipleTerms) { + // Add two hits for each of the first 50 terms in kCommonWords. + for (int i = 0; i < 50; ++i) { + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t tvi, + lite_index_->InsertTerm(std::string(kCommonWords[i]), + TermMatchType::PREFIX, kNamespace0)); + ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id, + term_id_codec_->EncodeTvi(tvi, TviType::LITE)); + Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0)); + ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1)); + } + + // Create kNumThreads threads to call lite_index_->FetchHits() + // simultaneously. Each thread should get a valid result of 2 hits for each + // term, and there should be no crash. + constexpr int kNumThreads = 50; + std::vector> hits(kNumThreads); + auto callable = [&](int thread_id) { + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t tvi, + lite_index_->InsertTerm(std::string(kCommonWords[thread_id]), + TermMatchType::PREFIX, kNamespace0)); + ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id, + term_id_codec_->EncodeTvi(tvi, TviType::LITE)); + lite_index_->FetchHits( + term_id, kSectionIdMaskAll, + /*only_from_prefix_sections=*/false, + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, + /*namespace_checker=*/nullptr, &hits[thread_id]); + }; + + // Spawn threads for FetchHits(). + std::vector thread_objs; + for (int i = 0; i < kNumThreads; ++i) { + thread_objs.emplace_back(callable, /*thread_id=*/i); + } + + // Join threads and verify results + for (int i = 0; i < kNumThreads; ++i) { + thread_objs[i].join(); + EXPECT_THAT( + hits[i], + ElementsAre( + EqualsDocHitInfo(kDocumentId1, std::vector{kSectionId0}), + EqualsDocHitInfo(kDocumentId0, + std::vector{kSectionId0}))); + } +} + +TEST_F(LiteIndexThreadSafetyTest, SimultaneousAddHitAndFetchHits_singleTerm) { + // Add some hits + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t foo_tvi, + lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0)); + + ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, + term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE)); + Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0)); + + // Create kNumThreads threads. Every even-numbered thread calls FetchHits and + // every odd numbered thread calls AddHit. + // Each AddHit operation adds the term 'foo' to a new section of the same doc. + // Each query result should contain one hit, and there should be no crash. + constexpr int kNumThreads = 50; + std::vector> hits(kNumThreads); + auto callable = [&](int thread_id) { + if (thread_id % 2 == 0) { + // Even-numbered thread calls FetchHits. + lite_index_->FetchHits( + foo_term_id, kSectionIdMaskAll, + /*only_from_prefix_sections=*/false, + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, + /*namespace_checker=*/nullptr, &hits[thread_id]); + } else { + // Odd-numbered thread calls AddHit. + Hit doc_hit(/*section_id=*/thread_id / 2, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit)); + } + }; + + // Spawn threads. + std::vector thread_objs; + for (int i = 0; i < kNumThreads; ++i) { + thread_objs.emplace_back(callable, /*thread_id=*/i); + } + + // Join threads and verify results. + for (int i = 0; i < kNumThreads; ++i) { + thread_objs[i].join(); + // All AddHit operations add 'foo' to the same document, so there should + // only be one DocHitInfo per run. + if (i % 2 == 0) { + EXPECT_THAT(hits[i], SizeIs(1)); + EXPECT_THAT(hits[i].back().document_id(), Eq(0)); + } + } + + // After all threads have executed, hits should come from sections 0-24. + std::vector final_hits; + lite_index_->FetchHits( + foo_term_id, kSectionIdMaskAll, + /*only_from_prefix_sections=*/false, + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, + /*namespace_checker=*/nullptr, &final_hits); + EXPECT_THAT(final_hits, SizeIs(1)); + EXPECT_THAT(final_hits.back().document_id(), Eq(0)); + // Section mask of sections 0-24. + EXPECT_THAT(final_hits.back().hit_section_ids_mask(), Eq((1 << 25) - 1)); +} + +TEST_F(LiteIndexThreadSafetyTest, + SimultaneousAddHitAndFetchHits_multipleTerms) { + // Add the initial hit 'foo'. + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t foo_tvi, + lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0)); + + ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, + term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE)); + Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0)); + + // Create kNumThreads threads. Every even-numbered thread calls FetchHits and + // every odd numbered thread calls AddHit. + // Each AddHit operation adds a different term to a new doc. + // Queries always search for the term 'foo' added above so there will always + // be a hit. There should be no crash. + constexpr int kNumThreads = 50; + std::vector> hits(kNumThreads); + auto callable = [&](int thread_id) { + // Create new tvi and term_id for new term kCommonWords[thread_id]. + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t tvi, + lite_index_->InsertTerm(std::string(kCommonWords[thread_id]), + TermMatchType::PREFIX, kNamespace0)); + ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id, + term_id_codec_->EncodeTvi(tvi, TviType::LITE)); + + if (thread_id % 2 == 0) { + // Even-numbered thread calls FetchHits. + lite_index_->FetchHits( + foo_term_id, kSectionIdMaskAll, /*only_from_prefix_sections=*/false, + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, + /*namespace_checker=*/nullptr, &hits[thread_id]); + } else { + // Odd-numbered thread calls AddHit. + // AddHit to section 0 of a new doc. + Hit doc_hit(/*section_id=*/kSectionId0, /*document_id=*/thread_id / 2, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit)); + } + }; + + // Spawn threads. + std::vector thread_objs; + for (int i = 0; i < kNumThreads; ++i) { + thread_objs.emplace_back(callable, /*thread_id=*/i); + } + + // Join threads and verify results. Queries always search for the term 'foo' + // so there will always be a hit + for (int i = 0; i < kNumThreads; ++i) { + thread_objs[i].join(); + if (i % 2 == 0) { + EXPECT_THAT(hits[i], + ElementsAre(EqualsDocHitInfo( + kDocumentId0, std::vector{kSectionId0}))); + } + } +} + +TEST_F(LiteIndexThreadSafetyTest, ManyAddHitAndOneFetchHits_multipleTerms) { + // Add two hits for each of the first 20 terms in kCommonWords. + for (int i = 0; i < 20; ++i) { + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t tvi, + lite_index_->InsertTerm(std::string(kCommonWords[i]), + TermMatchType::PREFIX, kNamespace0)); + ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id, + term_id_codec_->EncodeTvi(tvi, TviType::LITE)); + Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + Hit doc_hit1(/*section_id=*/kSectionId1, /*document_id=*/kDocumentId0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0)); + ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1)); + } + + // Create kNumThreads threads. Call one FetchHits operation after every 5 + // AddHit operations. + // Each AddHit operation adds a different term to a new doc. + // Queries always search for the term 'foo' added above so there will always + // be a hit. There should be no crash. + constexpr int kNumThreads = 100; + std::vector> hits(kNumThreads); + auto callable = [&](int thread_id) { + // Create new tvi and term_id for new term kCommonWords[thread_id]. + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t tvi, + lite_index_->InsertTerm(std::string(kCommonWords[thread_id / 5]), + TermMatchType::PREFIX, kNamespace0)); + ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id, + term_id_codec_->EncodeTvi(tvi, TviType::LITE)); + + if (thread_id % 5 == 0) { + // Call FetchHits on term kCommonWords[thread_id / 5] + lite_index_->FetchHits( + term_id, kSectionIdMaskAll, + /*only_from_prefix_sections=*/false, + SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT, + /*namespace_checker=*/nullptr, &hits[thread_id]); + } else { + // Odd-numbered thread calls AddHit. + // AddHit to section (thread_id % 5 + 1) of doc 0. + Hit doc_hit(/*section_id=*/thread_id % 5 + 1, + /*document_id=*/kDocumentId0, Hit::kDefaultTermFrequency, + /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit)); + } + }; + // Spawn threads. + std::vector thread_objs; + for (int i = 0; i < kNumThreads; ++i) { + thread_objs.emplace_back(callable, /*thread_id=*/i); + } + + // Join threads and verify FetchHits results. + // Every query should see a hit in doc 0 sections 0 and 1. Additional hits + // might also be found in sections 2-6 depending on thread execution order. + for (int i = 0; i < kNumThreads; ++i) { + thread_objs[i].join(); + if (i % 5 == 0) { + EXPECT_THAT(hits[i], SizeIs(1)); + EXPECT_THAT(hits[i].back().document_id(), Eq(0)); + EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Ge(0b11)); + EXPECT_THAT(hits[i].back().hit_section_ids_mask(), Le(0b1111111)); + } + } +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc index c16c5d1..816d46e 100644 --- a/icing/index/main/main-index_test.cc +++ b/icing/index/main/main-index_test.cc @@ -100,6 +100,8 @@ class MainIndexTest : public testing::Test { } void TearDown() override { + term_id_codec_.reset(); + lite_index_.reset(); ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str())); } diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h index a1d20f8..1b7b5ae 100644 --- a/icing/index/numeric/dummy-numeric-index.h +++ b/icing/index/numeric/dummy-numeric-index.h @@ -29,6 +29,8 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/file/filesystem.h" +#include "icing/file/persistent-storage.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/hit/hit.h" #include "icing/index/iterator/doc-hit-info-iterator.h" @@ -36,43 +38,54 @@ #include "icing/index/numeric/numeric-index.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" +#include "icing/util/crc32.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { +// DummyNumericIndex: dummy class to help with testing and unblock e2e +// integration for numeric search. It stores all numeric index data (keys and +// hits) in memory without actual persistent storages. All PersistentStorage +// features do not work as expected, i.e. they don't persist any data into disk +// and therefore data are volatile. template class DummyNumericIndex : public NumericIndex { public: + static libtextclassifier3::StatusOr>> + Create(const Filesystem& filesystem, std::string working_path) { + auto dummy_numeric_index = std::unique_ptr>( + new DummyNumericIndex(filesystem, std::move(working_path))); + ICING_RETURN_IF_ERROR(dummy_numeric_index->InitializeNewStorage()); + return dummy_numeric_index; + } + ~DummyNumericIndex() override = default; std::unique_ptr::Editor> Edit( - std::string_view property_name, DocumentId document_id, + std::string_view property_path, DocumentId document_id, SectionId section_id) override { - return std::make_unique(property_name, document_id, section_id, + return std::make_unique(property_path, document_id, section_id, storage_); } libtextclassifier3::StatusOr> GetIterator( - std::string_view property_name, T key_lower, T key_upper) const override; + std::string_view property_path, T key_lower, T key_upper) const override; libtextclassifier3::Status Reset() override { storage_.clear(); return libtextclassifier3::Status::OK; } - libtextclassifier3::Status PersistToDisk() override { - return libtextclassifier3::Status::OK; - } - private: class Editor : public NumericIndex::Editor { public: explicit Editor( - std::string_view property_name, DocumentId document_id, + std::string_view property_path, DocumentId document_id, SectionId section_id, std::unordered_map>>& storage) - : NumericIndex::Editor(property_name, document_id, section_id), + : NumericIndex::Editor(property_path, document_id, section_id), storage_(storage) {} ~Editor() override = default; @@ -147,20 +160,46 @@ class DummyNumericIndex : public NumericIndex { DocHitInfo doc_hit_info_; }; + private: + explicit DummyNumericIndex(const Filesystem& filesystem, + std::string&& working_path) + : NumericIndex(filesystem, std::move(working_path), + PersistentStorage::WorkingPathType::kDummy) {} + + libtextclassifier3::Status PersistStoragesToDisk() override { + return libtextclassifier3::Status::OK; + } + + libtextclassifier3::Status PersistMetadataToDisk() override { + return libtextclassifier3::Status::OK; + } + + libtextclassifier3::StatusOr ComputeInfoChecksum() override { + return Crc32(0); + } + + libtextclassifier3::StatusOr ComputeStoragesChecksum() override { + return Crc32(0); + } + + PersistentStorage::Crcs& crcs() override { return dummy_crcs_; } + const PersistentStorage::Crcs& crcs() const override { return dummy_crcs_; } + std::unordered_map>> storage_; + PersistentStorage::Crcs dummy_crcs_; }; template libtextclassifier3::Status DummyNumericIndex::Editor::IndexAllBufferedKeys() { - auto property_map_iter = storage_.find(this->property_name_); + auto property_map_iter = storage_.find(this->property_path_); if (property_map_iter == storage_.end()) { const auto& [inserted_iter, insert_result] = - storage_.insert({this->property_name_, {}}); + storage_.insert({this->property_path_, {}}); if (!insert_result) { return absl_ports::InternalError( absl_ports::StrCat("Failed to create a new map for property \"", - this->property_name_, "\"")); + this->property_path_, "\"")); } property_map_iter = inserted_iter; } @@ -207,17 +246,17 @@ libtextclassifier3::Status DummyNumericIndex::Iterator::Advance() { template libtextclassifier3::StatusOr> -DummyNumericIndex::GetIterator(std::string_view property_name, T key_lower, +DummyNumericIndex::GetIterator(std::string_view property_path, T key_lower, T key_upper) const { if (key_lower > key_upper) { return absl_ports::InvalidArgumentError( "key_lower should not be greater than key_upper"); } - auto property_map_iter = storage_.find(std::string(property_name)); + auto property_map_iter = storage_.find(std::string(property_path)); if (property_map_iter == storage_.end()) { return absl_ports::NotFoundError( - absl_ports::StrCat("Property \"", property_name, "\" not found")); + absl_ports::StrCat("Property \"", property_path, "\" not found")); } std::vector bucket_info_vec; diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc index fa8fa3e..0233b38 100644 --- a/icing/index/numeric/integer-index-storage.cc +++ b/icing/index/numeric/integer-index-storage.cc @@ -48,102 +48,26 @@ namespace lib { namespace { -// Helper function to PWrite crcs and info to metadata_file_path. -libtextclassifier3::Status WriteMetadata( - const Filesystem& filesystem, const std::string& metadata_file_path, - const IntegerIndexStorage::Crcs* crcs, - const IntegerIndexStorage::Info* info) { - ScopedFd sfd(filesystem.OpenForWrite(metadata_file_path.c_str())); - if (!sfd.is_valid()) { - return absl_ports::InternalError("Failed to create metadata file"); - } - - // Write crcs and info. File layout: - ICING_RETURN_IF_ERROR(crcs->Serialize(filesystem, sfd.get())); - ICING_RETURN_IF_ERROR(info->Serialize(filesystem, sfd.get())); - - return libtextclassifier3::Status::OK; -} - -// Helper function to update checksums from info and storages to a Crcs -// instance. -libtextclassifier3::Status UpdateChecksums( - IntegerIndexStorage::Crcs* crcs, IntegerIndexStorage::Info* info, - FileBackedVector* sorted_buckets, - FileBackedVector* unsorted_buckets, - FlashIndexStorage* flash_index_storage) { - // Compute crcs - ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc, - sorted_buckets->ComputeChecksum()); - ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc, - unsorted_buckets->ComputeChecksum()); - - crcs->component_crcs.info_crc = info->ComputeChecksum().Get(); - crcs->component_crcs.sorted_buckets_crc = sorted_buckets_crc.Get(); - crcs->component_crcs.unsorted_buckets_crc = unsorted_buckets_crc.Get(); - // TODO(b/259744228): implement and update flash_index_storage checksum - crcs->component_crcs.flash_index_storage_crc = 0; - crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get(); - - return libtextclassifier3::Status::OK; -} - -// Helper function to validate checksums. -libtextclassifier3::Status ValidateChecksums( - const IntegerIndexStorage::Crcs* crcs, - const IntegerIndexStorage::Info* info, - FileBackedVector* sorted_buckets, - FileBackedVector* unsorted_buckets, - FlashIndexStorage* flash_index_storage) { - if (crcs->all_crc != crcs->component_crcs.ComputeChecksum().Get()) { - return absl_ports::FailedPreconditionError( - "Invalid all crc for IntegerIndexStorage"); - } - - if (crcs->component_crcs.info_crc != info->ComputeChecksum().Get()) { - return absl_ports::FailedPreconditionError( - "Invalid info crc for IntegerIndexStorage"); - } - - ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc, - sorted_buckets->ComputeChecksum()); - if (crcs->component_crcs.sorted_buckets_crc != sorted_buckets_crc.Get()) { - return absl_ports::FailedPreconditionError( - "Mismatch crc with IntegerIndexStorage sorted buckets"); - } - - ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc, - unsorted_buckets->ComputeChecksum()); - if (crcs->component_crcs.unsorted_buckets_crc != unsorted_buckets_crc.Get()) { - return absl_ports::FailedPreconditionError( - "Mismatch crc with IntegerIndexStorage unsorted buckets"); - } - - // TODO(b/259744228): implement and verify flash_index_storage checksum - - return libtextclassifier3::Status::OK; -} - // The following 4 methods are helper functions to get the correct file path of // metadata/sorted_buckets/unsorted_buckets/flash_index_storage, according to // the given working directory. -std::string GetMetadataFilePath(std::string_view working_dir) { - return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix, +std::string GetMetadataFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix, ".m"); } -std::string GetSortedBucketsFilePath(std::string_view working_dir) { - return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix, +std::string GetSortedBucketsFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix, ".s"); } -std::string GetUnsortedBucketsFilePath(std::string_view working_dir) { - return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix, +std::string GetUnsortedBucketsFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix, ".u"); } -std::string GetFlashIndexStorageFilePath(std::string_view working_dir) { - return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix, +std::string GetFlashIndexStorageFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", IntegerIndexStorage::kFilePrefix, ".f"); } @@ -358,7 +282,7 @@ bool IntegerIndexStorage::Options::IsValid() const { return false; } std::sort(buckets.begin(), buckets.end()); - int64_t expected_lower = std::numeric_limits::min(); + int64_t prev_upper = std::numeric_limits::min(); for (int i = 0; i < buckets.size(); ++i) { // key_lower should not be greater than key_upper and init bucket should // have invalid posting list identifier. @@ -367,46 +291,46 @@ bool IntegerIndexStorage::Options::IsValid() const { return false; } - if (buckets[i].key_lower() != expected_lower) { + // Previous upper bound should not be INT64_MAX since it is not the last + // bucket. + if (prev_upper == std::numeric_limits::max()) { return false; } - // If it is the last bucket, then key_upper should be INT64_MAX. Otherwise - // it should not be INT64_MAX. Use XOR for this logic. - if ((buckets[i].key_upper() == std::numeric_limits::max()) ^ - (i == buckets.size() - 1)) { + int64_t expected_lower = + (i == 0 ? std::numeric_limits::min() : prev_upper + 1); + if (buckets[i].key_lower() != expected_lower) { return false; } - expected_lower = buckets[i].key_upper() + 1; + + prev_upper = buckets[i].key_upper(); } - return true; + return prev_upper == std::numeric_limits::max(); } /* static */ libtextclassifier3::StatusOr> IntegerIndexStorage::Create( - const Filesystem& filesystem, std::string_view base_dir, Options options, + const Filesystem& filesystem, std::string working_path, Options options, PostingListIntegerIndexSerializer* posting_list_serializer) { if (!options.IsValid()) { return absl_ports::InvalidArgumentError( "Invalid IntegerIndexStorage options"); } - std::string working_dir = absl_ports::StrCat(base_dir, "/", kSubDirectory); - if (!filesystem.FileExists(GetMetadataFilePath(working_dir).c_str()) || - !filesystem.FileExists(GetSortedBucketsFilePath(working_dir).c_str()) || - !filesystem.FileExists(GetUnsortedBucketsFilePath(working_dir).c_str()) || + if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) || + !filesystem.FileExists(GetSortedBucketsFilePath(working_path).c_str()) || !filesystem.FileExists( - GetFlashIndexStorageFilePath(working_dir).c_str())) { - // Delete working_dir if any of them is missing, and reinitialize. - if (!filesystem.DeleteDirectoryRecursively(working_dir.c_str())) { - return absl_ports::InternalError( - absl_ports::StrCat("Failed to delete directory: ", working_dir)); - } - return InitializeNewFiles(filesystem, std::move(working_dir), + GetUnsortedBucketsFilePath(working_path).c_str()) || + !filesystem.FileExists( + GetFlashIndexStorageFilePath(working_path).c_str())) { + // Discard working_path if any of them is missing, and reinitialize. + ICING_RETURN_IF_ERROR( + PersistentStorage::Discard(filesystem, working_path, kWorkingPathType)); + return InitializeNewFiles(filesystem, std::move(working_path), std::move(options), posting_list_serializer); } - return InitializeExistingFiles(filesystem, std::move(working_dir), + return InitializeExistingFiles(filesystem, std::move(working_path), std::move(options), posting_list_serializer); } @@ -414,7 +338,7 @@ IntegerIndexStorage::~IntegerIndexStorage() { if (!PersistToDisk().ok()) { ICING_LOG(WARNING) << "Failed to persist hash map to disk while destructing " - << working_dir_; + << working_path_; } } @@ -645,33 +569,15 @@ IntegerIndexStorage::GetIterator(int64_t query_key_lower, query_key_lower, query_key_upper, std::move(bucket_pl_iters))); } -libtextclassifier3::Status IntegerIndexStorage::PersistToDisk() { - ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk()); - ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk()); - if (!flash_index_storage_->PersistToDisk()) { - return absl_ports::InternalError( - "Fail to persist FlashIndexStorage to disk"); - } - - ICING_RETURN_IF_ERROR(UpdateChecksums(crcs(), info(), sorted_buckets_.get(), - unsorted_buckets_.get(), - flash_index_storage_.get())); - // Changes should have been applied to the underlying file when using - // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an - // extra safety step to ensure they are written out. - ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk()); - - return libtextclassifier3::Status::OK; -} - /* static */ libtextclassifier3::StatusOr> IntegerIndexStorage::InitializeNewFiles( - const Filesystem& filesystem, std::string&& working_dir, Options&& options, + const Filesystem& filesystem, std::string&& working_path, Options&& options, PostingListIntegerIndexSerializer* posting_list_serializer) { + // IntegerIndexStorage uses working_path as working directory path. // Create working directory. - if (!filesystem.CreateDirectoryRecursively(working_dir.c_str())) { + if (!filesystem.CreateDirectory(working_path.c_str())) { return absl_ports::InternalError( - absl_ports::StrCat("Failed to create directory: ", working_dir)); + absl_ports::StrCat("Failed to create directory: ", working_path)); } // TODO(b/259743562): [Optimization 1] decide max # buckets, unsorted buckets @@ -683,7 +589,7 @@ IntegerIndexStorage::InitializeNewFiles( ICING_ASSIGN_OR_RETURN( std::unique_ptr> sorted_buckets, FileBackedVector::Create( - filesystem, GetSortedBucketsFilePath(working_dir), + filesystem, GetSortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, pre_mapping_mmap_size)); @@ -694,14 +600,14 @@ IntegerIndexStorage::InitializeNewFiles( ICING_ASSIGN_OR_RETURN( std::unique_ptr> unsorted_buckets, FileBackedVector::Create( - filesystem, GetUnsortedBucketsFilePath(working_dir), + filesystem, GetUnsortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, pre_mapping_mmap_size)); // Initialize flash_index_storage ICING_ASSIGN_OR_RETURN( FlashIndexStorage flash_index_storage, - FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_dir), + FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path), &filesystem, posting_list_serializer)); if (options.HasCustomInitBuckets()) { @@ -736,47 +642,45 @@ IntegerIndexStorage::InitializeNewFiles( } ICING_RETURN_IF_ERROR(sorted_buckets->PersistToDisk()); - // Create and initialize new info - Info new_info; - new_info.magic = Info::kMagic; - new_info.num_keys = 0; - - // Compute checksums - Crcs new_crcs; - ICING_RETURN_IF_ERROR( - UpdateChecksums(&new_crcs, &new_info, sorted_buckets.get(), - unsorted_buckets.get(), &flash_index_storage)); - - const std::string metadata_file_path = GetMetadataFilePath(working_dir); - // Write new metadata file - ICING_RETURN_IF_ERROR( - WriteMetadata(filesystem, metadata_file_path, &new_crcs, &new_info)); - - // Mmap the content of the crcs and info. + // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and + // call GrowAndRemapIfNecessary to grow the underlying file. ICING_ASSIGN_OR_RETURN( MemoryMappedFile metadata_mmapped_file, - MemoryMappedFile::Create(filesystem, metadata_file_path, + MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, /*max_file_size=*/kMetadataFileSize, /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/kMetadataFileSize)); - - return std::unique_ptr(new IntegerIndexStorage( - filesystem, std::move(working_dir), std::move(options), - posting_list_serializer, - std::make_unique(std::move(metadata_mmapped_file)), - std::move(sorted_buckets), std::move(unsorted_buckets), - std::make_unique(std::move(flash_index_storage)))); + ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary( + /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize)); + + // Create instance. + auto new_integer_index_storage = + std::unique_ptr(new IntegerIndexStorage( + filesystem, std::move(working_path), std::move(options), + posting_list_serializer, + std::make_unique(std::move(metadata_mmapped_file)), + std::move(sorted_buckets), std::move(unsorted_buckets), + std::make_unique(std::move(flash_index_storage)))); + // Initialize info content by writing mapped memory directly. + Info& info_ref = new_integer_index_storage->info(); + info_ref.magic = Info::kMagic; + info_ref.num_keys = 0; + // Initialize new PersistentStorage. The initial checksums will be computed + // and set via InitializeNewStorage. + ICING_RETURN_IF_ERROR(new_integer_index_storage->InitializeNewStorage()); + + return new_integer_index_storage; } /* static */ libtextclassifier3::StatusOr> IntegerIndexStorage::InitializeExistingFiles( - const Filesystem& filesystem, std::string&& working_dir, Options&& options, + const Filesystem& filesystem, std::string&& working_path, Options&& options, PostingListIntegerIndexSerializer* posting_list_serializer) { // Mmap the content of the crcs and info. ICING_ASSIGN_OR_RETURN( MemoryMappedFile metadata_mmapped_file, - MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_dir), + MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, /*max_file_size=*/kMetadataFileSize, /*pre_mapping_file_offset=*/0, @@ -791,7 +695,7 @@ IntegerIndexStorage::InitializeExistingFiles( ICING_ASSIGN_OR_RETURN( std::unique_ptr> sorted_buckets, FileBackedVector::Create( - filesystem, GetSortedBucketsFilePath(working_dir), + filesystem, GetSortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, pre_mapping_mmap_size)); @@ -802,31 +706,67 @@ IntegerIndexStorage::InitializeExistingFiles( ICING_ASSIGN_OR_RETURN( std::unique_ptr> unsorted_buckets, FileBackedVector::Create( - filesystem, GetUnsortedBucketsFilePath(working_dir), + filesystem, GetUnsortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, pre_mapping_mmap_size)); // Initialize flash_index_storage ICING_ASSIGN_OR_RETURN( FlashIndexStorage flash_index_storage, - FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_dir), + FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path), &filesystem, posting_list_serializer)); - Crcs* crcs_ptr = reinterpret_cast( - metadata_mmapped_file.mutable_region() + Crcs::kFileOffset); - Info* info_ptr = reinterpret_cast( - metadata_mmapped_file.mutable_region() + Info::kFileOffset); - // Validate checksums of info and 3 storages. - ICING_RETURN_IF_ERROR( - ValidateChecksums(crcs_ptr, info_ptr, sorted_buckets.get(), - unsorted_buckets.get(), &flash_index_storage)); - - return std::unique_ptr(new IntegerIndexStorage( - filesystem, std::move(working_dir), std::move(options), - posting_list_serializer, - std::make_unique(std::move(metadata_mmapped_file)), - std::move(sorted_buckets), std::move(unsorted_buckets), - std::make_unique(std::move(flash_index_storage)))); + // Create instance. + auto integer_index_storage = + std::unique_ptr(new IntegerIndexStorage( + filesystem, std::move(working_path), std::move(options), + posting_list_serializer, + std::make_unique(std::move(metadata_mmapped_file)), + std::move(sorted_buckets), std::move(unsorted_buckets), + std::make_unique(std::move(flash_index_storage)))); + // Initialize existing PersistentStorage. Checksums will be validated. + ICING_RETURN_IF_ERROR(integer_index_storage->InitializeExistingStorage()); + + // Validate other values of info and options. + // Magic should be consistent with the codebase. + if (integer_index_storage->info().magic != Info::kMagic) { + return absl_ports::FailedPreconditionError("Incorrect magic value"); + } + + return integer_index_storage; +} + +libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk() { + ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk()); + ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk()); + if (!flash_index_storage_->PersistToDisk()) { + return absl_ports::InternalError( + "Fail to persist FlashIndexStorage to disk"); + } + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status IntegerIndexStorage::PersistMetadataToDisk() { + // Changes should have been applied to the underlying file when using + // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an + // extra safety step to ensure they are written out. + return metadata_mmapped_file_->PersistToDisk(); +} + +libtextclassifier3::StatusOr IntegerIndexStorage::ComputeInfoChecksum() { + return info().ComputeChecksum(); +} + +libtextclassifier3::StatusOr +IntegerIndexStorage::ComputeStoragesChecksum() { + // Compute crcs + ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc, + sorted_buckets_->ComputeChecksum()); + ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc, + unsorted_buckets_->ComputeChecksum()); + + // TODO(b/259744228): implement and include flash_index_storage checksum + return Crc32(sorted_buckets_crc.Get() ^ unsorted_buckets_crc.Get()); } libtextclassifier3::StatusOr> diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h index 562060b..bef8282 100644 --- a/icing/index/numeric/integer-index-storage.h +++ b/icing/index/numeric/integer-index-storage.h @@ -26,6 +26,7 @@ #include "icing/file/file-backed-vector.h" #include "icing/file/filesystem.h" #include "icing/file/memory-mapped-file.h" +#include "icing/file/persistent-storage.h" #include "icing/file/posting_list/flash-index-storage.h" #include "icing/file/posting_list/posting-list-identifier.h" #include "icing/index/iterator/doc-hit-info-iterator.h" @@ -70,66 +71,14 @@ namespace lib { // choose sorted/unsorted bucket array. // - Then we do binary search on the sorted bucket array and sequential search // on the unsorted bucket array. -class IntegerIndexStorage { +class IntegerIndexStorage : public PersistentStorage { public: - // Crcs and Info will be written into the metadata file. - // File layout: - // Crcs - struct Crcs { - static constexpr int32_t kFileOffset = 0; - - struct ComponentCrcs { - uint32_t info_crc; - uint32_t sorted_buckets_crc; - uint32_t unsorted_buckets_crc; - uint32_t flash_index_storage_crc; - - bool operator==(const ComponentCrcs& other) const { - return info_crc == other.info_crc && - sorted_buckets_crc == other.sorted_buckets_crc && - unsorted_buckets_crc == other.unsorted_buckets_crc && - flash_index_storage_crc == other.flash_index_storage_crc; - } - - Crc32 ComputeChecksum() const { - return Crc32(std::string_view(reinterpret_cast(this), - sizeof(ComponentCrcs))); - } - } __attribute__((packed)); - - libtextclassifier3::Status Serialize(const Filesystem& filesystem, - int fd) const { - if (!filesystem.PWrite(fd, kFileOffset, this, sizeof(*this))) { - return absl_ports::InternalError("Failed to write crcs into file"); - } - return libtextclassifier3::Status::OK; - } - - bool operator==(const Crcs& other) const { - return all_crc == other.all_crc && component_crcs == other.component_crcs; - } - - uint32_t all_crc; - ComponentCrcs component_crcs; - } __attribute__((packed)); - static_assert(sizeof(Crcs) == 20, ""); - - // Info struct Info { - static constexpr int32_t kFileOffset = static_cast(sizeof(Crcs)); static constexpr int32_t kMagic = 0xc4bf0ccc; int32_t magic; int32_t num_keys; - libtextclassifier3::Status Serialize(const Filesystem& filesystem, - int fd) const { - if (!filesystem.PWrite(fd, kFileOffset, this, sizeof(*this))) { - return absl_ports::InternalError("Failed to write info into file"); - } - return libtextclassifier3::Status::OK; - } - Crc32 ComputeChecksum() const { return Crc32( std::string_view(reinterpret_cast(this), sizeof(Info))); @@ -137,9 +86,6 @@ class IntegerIndexStorage { } __attribute__((packed)); static_assert(sizeof(Info) == 8, ""); - static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info); - static_assert(kMetadataFileSize == 28); - // Bucket class Bucket { public: @@ -219,19 +165,34 @@ class IntegerIndexStorage { std::vector custom_init_unsorted_buckets; }; - static constexpr std::string_view kSubDirectory = "storage_dir"; + // Metadata file layout: + static constexpr int32_t kCrcsMetadataFileOffset = 0; + static constexpr int32_t kInfoMetadataFileOffset = + static_cast(sizeof(Crcs)); + static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info); + static_assert(kMetadataFileSize == 20, ""); + + static constexpr WorkingPathType kWorkingPathType = + WorkingPathType::kDirectory; static constexpr std::string_view kFilePrefix = "integer_index_storage"; - // Creates a new IntegerIndexStorage instance to index integers. For directory - // management purpose, we define working_dir as "/storage_dir", and - // all underlying files will be stored under it. If any of the underlying file - // is missing, then delete the whole working_dir and (re)initialize with new - // ones. Otherwise initialize and create the instance by existing files. + // Creates a new IntegerIndexStorage instance to index integers (for a single + // property). If any of the underlying file is missing, then delete the whole + // working_path and (re)initialize with new ones. Otherwise initialize and + // create the instance by existing files. // // filesystem: Object to make system level calls - // base_dir: Specifies the base directory for all integer index data related - // files to be stored. As mentioned above, all files will be stored - // under working_dir (which is "/storage_dir"). + // working_path: Specifies the working path for PersistentStorage. + // IntegerIndexStorage uses working path as working directory + // and all related files will be stored under this directory. It + // takes full ownership and of working_path_, including + // creation/deletion. It is the caller's responsibility to + // specify correct working path and avoid mixing different + // persistent storages together under the same path. Also the + // caller has the ownership for the parent directory of + // working_path_, and it is responsible for parent directory + // creation/deletion. See PersistentStorage for more details + // about the concept of working_path. // options: Options instance. // posting_list_serializer: a PostingListIntegerIndexSerializer instance to // serialize/deserialize integer index data to/from @@ -244,10 +205,21 @@ class IntegerIndexStorage { // - INTERNAL_ERROR on I/O errors. // - Any FileBackedVector/FlashIndexStorage errors. static libtextclassifier3::StatusOr> - Create(const Filesystem& filesystem, std::string_view base_dir, + Create(const Filesystem& filesystem, std::string working_path, Options options, PostingListIntegerIndexSerializer* posting_list_serializer); + // Deletes IntegerIndexStorage under working_path. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + static libtextclassifier3::Status Discard(const Filesystem& filesystem, + const std::string& working_path) { + return PersistentStorage::Discard(filesystem, working_path, + kWorkingPathType); + } + // Delete copy and move constructor/assignment operator. IntegerIndexStorage(const IntegerIndexStorage&) = delete; IntegerIndexStorage& operator=(const IntegerIndexStorage&) = delete; @@ -255,7 +227,7 @@ class IntegerIndexStorage { IntegerIndexStorage(IntegerIndexStorage&&) = delete; IntegerIndexStorage& operator=(IntegerIndexStorage&&) = delete; - ~IntegerIndexStorage(); + ~IntegerIndexStorage() override; // Batch adds new keys (of the same DocumentId and SectionId) into the integer // index storage. @@ -286,36 +258,17 @@ class IntegerIndexStorage { libtextclassifier3::StatusOr> GetIterator( int64_t query_key_lower, int64_t query_key_upper) const; - // Flushes content to underlying files. - // - // Returns: - // - OK on success - // - INTERNAL_ERROR on I/O error - libtextclassifier3::Status PersistToDisk(); - private: - static libtextclassifier3::StatusOr> - InitializeNewFiles( - const Filesystem& filesystem, std::string&& working_dir, - Options&& options, - PostingListIntegerIndexSerializer* posting_list_serializer); - - static libtextclassifier3::StatusOr> - InitializeExistingFiles( - const Filesystem& filesystem, std::string&& working_dir, - Options&& options, - PostingListIntegerIndexSerializer* posting_list_serializer); - explicit IntegerIndexStorage( - const Filesystem& filesystem, std::string&& working_dir, + const Filesystem& filesystem, std::string&& working_path, Options&& options, PostingListIntegerIndexSerializer* posting_list_serializer, std::unique_ptr metadata_mmapped_file, std::unique_ptr> sorted_buckets, std::unique_ptr> unsorted_buckets, std::unique_ptr flash_index_storage) - : filesystem_(filesystem), - working_dir_(std::move(working_dir)), + : PersistentStorage(filesystem, std::move(working_path), + kWorkingPathType), options_(std::move(options)), posting_list_serializer_(posting_list_serializer), metadata_mmapped_file_(std::move(metadata_mmapped_file)), @@ -323,6 +276,46 @@ class IntegerIndexStorage { unsorted_buckets_(std::move(unsorted_buckets)), flash_index_storage_(std::move(flash_index_storage)) {} + static libtextclassifier3::StatusOr> + InitializeNewFiles( + const Filesystem& filesystem, std::string&& working_path, + Options&& options, + PostingListIntegerIndexSerializer* posting_list_serializer); + + static libtextclassifier3::StatusOr> + InitializeExistingFiles( + const Filesystem& filesystem, std::string&& working_path, + Options&& options, + PostingListIntegerIndexSerializer* posting_list_serializer); + + // Flushes contents of all storages to underlying files. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status PersistStoragesToDisk() override; + + // Flushes contents of metadata file. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status PersistMetadataToDisk() override; + + // Computes and returns Info checksum. + // + // Returns: + // - Crc of the Info on success + libtextclassifier3::StatusOr ComputeInfoChecksum() override; + + // Computes and returns all storages checksum. Checksums of bucket_storage_, + // entry_storage_ and kv_storage_ will be combined together by XOR. + // + // Returns: + // - Crc of all storages on success + // - INTERNAL_ERROR if any data inconsistency + libtextclassifier3::StatusOr ComputeStoragesChecksum() override; + // Helper function to add keys in range [it_start, it_end) into the given // bucket. It handles the bucket and its corresponding posting list(s) to make // searching and indexing efficient. @@ -352,23 +345,25 @@ class IntegerIndexStorage { const std::vector::const_iterator& it_end, FileBackedVector::MutableView& mutable_bucket); - Crcs* crcs() { - return reinterpret_cast(metadata_mmapped_file_->mutable_region() + - Crcs::kFileOffset); + Crcs& crcs() override { + return *reinterpret_cast(metadata_mmapped_file_->mutable_region() + + kCrcsMetadataFileOffset); } - Info* info() { - return reinterpret_cast(metadata_mmapped_file_->mutable_region() + - Info::kFileOffset); + const Crcs& crcs() const override { + return *reinterpret_cast(metadata_mmapped_file_->region() + + kCrcsMetadataFileOffset); } - const Info* info() const { - return reinterpret_cast(metadata_mmapped_file_->region() + - Info::kFileOffset); + Info& info() { + return *reinterpret_cast(metadata_mmapped_file_->mutable_region() + + kInfoMetadataFileOffset); } - const Filesystem& filesystem_; - std::string working_dir_; + const Info& info() const { + return *reinterpret_cast(metadata_mmapped_file_->region() + + kInfoMetadataFileOffset); + } Options options_; diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc index 0afc96b..92fb912 100644 --- a/icing/index/numeric/integer-index-storage_test.cc +++ b/icing/index/numeric/integer-index-storage_test.cc @@ -25,6 +25,8 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "icing/file/file-backed-vector.h" +#include "icing/file/persistent-storage.h" #include "icing/file/posting_list/posting-list-identifier.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" @@ -48,9 +50,10 @@ using ::testing::IsEmpty; using ::testing::IsFalse; using ::testing::IsTrue; using ::testing::Ne; +using ::testing::Not; using Bucket = IntegerIndexStorage::Bucket; -using Crcs = IntegerIndexStorage::Crcs; +using Crcs = PersistentStorage::Crcs; using Info = IntegerIndexStorage::Info; using Options = IntegerIndexStorage::Options; @@ -61,7 +64,11 @@ static constexpr SectionId kDefaultSectionId = 31; class IntegerIndexStorageTest : public ::testing::Test { protected: void SetUp() override { - base_dir_ = GetTestTempDir() + "/integer_index_storage_test"; + base_dir_ = GetTestTempDir() + "/icing"; + ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), + IsTrue()); + + working_path_ = base_dir_ + "/integer_index_storage_test"; serializer_ = std::make_unique(); } @@ -73,6 +80,7 @@ class IntegerIndexStorageTest : public ::testing::Test { Filesystem filesystem_; std::string base_dir_; + std::string working_path_; std::unique_ptr serializer_; }; @@ -191,9 +199,10 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) { IsFalse()); } -TEST_F(IntegerIndexStorageTest, InvalidBaseDir) { - EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, "/dev/null", Options(), - serializer_.get()), +TEST_F(IntegerIndexStorageTest, InvalidWorkingPath) { + EXPECT_THAT(IntegerIndexStorage::Create( + filesystem_, "/dev/null/integer_index_storage_test", + Options(), serializer_.get()), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } @@ -205,7 +214,7 @@ TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) { Bucket(std::numeric_limits::min(), -100)}); ASSERT_THAT(invalid_options.IsValid(), IsFalse()); - EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, base_dir_, + EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_, invalid_options, serializer_.get()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } @@ -213,10 +222,10 @@ TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) { TEST_F(IntegerIndexStorageTest, InitializeNewFiles) { { // Create new integer index storage - ASSERT_FALSE(filesystem_.DirectoryExists(base_dir_.c_str())); + ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); ICING_ASSERT_OK(storage->PersistToDisk()); @@ -224,29 +233,25 @@ TEST_F(IntegerIndexStorageTest, InitializeNewFiles) { // Metadata file should be initialized correctly for both info and crcs // sections. - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory, - "/", IntegerIndexStorage::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); // Check info section Info info; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info), - Info::kFileOffset)); + IntegerIndexStorage::kInfoMetadataFileOffset)); EXPECT_THAT(info.magic, Eq(Info::kMagic)); EXPECT_THAT(info.num_keys, Eq(0)); // Check crcs section Crcs crcs; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + IntegerIndexStorage::kCrcsMetadataFileOffset)); // # of elements in sorted_buckets should be 1, so it should have non-zero - // crc value. - EXPECT_THAT(crcs.component_crcs.sorted_buckets_crc, Ne(0)); - // Other empty file backed vectors should have 0 crc value. - EXPECT_THAT(crcs.component_crcs.unsorted_buckets_crc, Eq(0)); - EXPECT_THAT(crcs.component_crcs.flash_index_storage_crc, Eq(0)); + // all storages crc value. + EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0)); EXPECT_THAT(crcs.component_crcs.info_crc, Eq(Crc32(std::string_view(reinterpret_cast(&info), sizeof(Info))) @@ -263,7 +268,7 @@ TEST_F(IntegerIndexStorageTest, // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); // Insert some data. @@ -276,7 +281,7 @@ TEST_F(IntegerIndexStorageTest, // Without calling PersistToDisk, checksums will not be recomputed or synced // to disk, so initializing another instance on the same files should fail. - EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get()), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } @@ -285,7 +290,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage1, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); // Insert some data. @@ -308,7 +313,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage2, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); EXPECT_THAT( Query(storage2.get(), /*key_lower=*/std::numeric_limits::min(), @@ -323,7 +328,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); ICING_ASSERT_OK_AND_ASSIGN( @@ -340,7 +345,7 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) { // we should be able to get the same contents. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); EXPECT_THAT( Query(storage.get(), /*key_lower=*/std::numeric_limits::min(), @@ -356,7 +361,7 @@ TEST_F(IntegerIndexStorageTest, // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -364,32 +369,32 @@ TEST_F(IntegerIndexStorageTest, ICING_ASSERT_OK(storage->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory, - "/", IntegerIndexStorage::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); Crcs crcs; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + IntegerIndexStorage::kCrcsMetadataFileOffset)); // Manually corrupt all_crc crcs.all_crc += kCorruptedValueOffset; - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); + ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), + IntegerIndexStorage::kCrcsMetadataFileOffset, + &crcs, sizeof(Crcs))); metadata_sfd.reset(); { // Attempt to create the integer index storage with metadata containing // corrupted all_crc. This should fail. libtextclassifier3::StatusOr> - storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_, + storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(storage_or.status().error_message(), - HasSubstr("Invalid all crc for IntegerIndexStorage")); + HasSubstr("Invalid all crc")); } } @@ -399,7 +404,7 @@ TEST_F(IntegerIndexStorageTest, // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -407,41 +412,41 @@ TEST_F(IntegerIndexStorageTest, ICING_ASSERT_OK(storage->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory, - "/", IntegerIndexStorage::kFilePrefix, ".m"); + const std::string metadata_file_path = absl_ports::StrCat( + working_path_, "/", IntegerIndexStorage::kFilePrefix, ".m"); ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); ASSERT_TRUE(metadata_sfd.is_valid()); Info info; ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info), - Info::kFileOffset)); + IntegerIndexStorage::kInfoMetadataFileOffset)); // Modify info, but don't update the checksum. This would be similar to // corruption of info. info.num_keys += kCorruptedValueOffset; - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Info::kFileOffset, &info, - sizeof(Info))); + ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), + IntegerIndexStorage::kInfoMetadataFileOffset, + &info, sizeof(Info))); { // Attempt to create the integer index storage with info that doesn't match // its checksum and confirm that it fails. libtextclassifier3::StatusOr> - storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_, + storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(storage_or.status().error_message(), - HasSubstr("Invalid info crc for IntegerIndexStorage")); + HasSubstr("Invalid info crc")); } } TEST_F(IntegerIndexStorageTest, - InitializeExistingFilesWithWrongSortedBucketsCrcShouldFail) { + InitializeExistingFilesWithCorruptedSortedBucketsShouldFail) { { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -449,42 +454,45 @@ TEST_F(IntegerIndexStorageTest, ICING_ASSERT_OK(storage->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory, - "/", IntegerIndexStorage::kFilePrefix, ".m"); - ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); - ASSERT_TRUE(metadata_sfd.is_valid()); - - Crcs crcs; - ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + { + // Corrupt sorted buckets manually. + const std::string sorted_buckets_file_path = absl_ports::StrCat( + working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> sorted_buckets, + FileBackedVector::Create( + filesystem_, sorted_buckets_file_path, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, + sorted_buckets->ComputeChecksum()); + ICING_ASSERT_OK(sorted_buckets->Append(Bucket( + /*key_lower=*/0, /*key_upper=*/std::numeric_limits::max()))); + ICING_ASSERT_OK(sorted_buckets->PersistToDisk()); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, + sorted_buckets->ComputeChecksum()); + ASSERT_THAT(old_crc, Not(Eq(new_crc))); + } - // Manually corrupt sorted_buckets_crc - crcs.component_crcs.sorted_buckets_crc += kCorruptedValueOffset; - crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get(); - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); { // Attempt to create the integer index storage with metadata containing // corrupted sorted_buckets_crc. This should fail. libtextclassifier3::StatusOr> - storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_, + storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - storage_or.status().error_message(), - HasSubstr("Mismatch crc with IntegerIndexStorage sorted buckets")); + EXPECT_THAT(storage_or.status().error_message(), + HasSubstr("Invalid storages crc")); } } TEST_F(IntegerIndexStorageTest, - InitializeExistingFilesWithWrongUnsortedBucketsCrcShouldFail) { + InitializeExistingFilesWithCorruptedUnsortedBucketsShouldFail) { { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -492,42 +500,47 @@ TEST_F(IntegerIndexStorageTest, ICING_ASSERT_OK(storage->PersistToDisk()); } - const std::string metadata_file_path = - absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory, - "/", IntegerIndexStorage::kFilePrefix, ".m"); - ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str())); - ASSERT_TRUE(metadata_sfd.is_valid()); - - Crcs crcs; - ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs), - Crcs::kFileOffset)); + { + // Corrupt unsorted buckets manually. + const std::string unsorted_buckets_file_path = absl_ports::StrCat( + working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u"); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr> unsorted_buckets, + FileBackedVector::Create( + filesystem_, unsorted_buckets_file_path, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/sizeof(Bucket) * 100 + + FileBackedVector::Header::kHeaderSize)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, + unsorted_buckets->ComputeChecksum()); + ICING_ASSERT_OK(unsorted_buckets->Append(Bucket( + /*key_lower=*/0, /*key_upper=*/std::numeric_limits::max()))); + ICING_ASSERT_OK(unsorted_buckets->PersistToDisk()); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, + unsorted_buckets->ComputeChecksum()); + ASSERT_THAT(old_crc, Not(Eq(new_crc))); + } - // Manually corrupt unsorted_buckets_crc - crcs.component_crcs.unsorted_buckets_crc += kCorruptedValueOffset; - crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get(); - ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs, - sizeof(Crcs))); { // Attempt to create the integer index storage with metadata containing // corrupted unsorted_buckets_crc. This should fail. libtextclassifier3::StatusOr> - storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_, + storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - storage_or.status().error_message(), - HasSubstr("Mismatch crc with IntegerIndexStorage unsorted buckets")); + EXPECT_THAT(storage_or.status().error_message(), + HasSubstr("Invalid storages crc")); } } -// TODO(b/259744228): add test for corrupted flash_index_storage_crc +// TODO(b/259744228): add test for corrupted flash_index_storage TEST_F(IntegerIndexStorageTest, InvalidQuery) { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, - IntegerIndexStorage::Create(filesystem_, base_dir_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, Options(), serializer_.get())); EXPECT_THAT( storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1), @@ -546,7 +559,7 @@ TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -599,7 +612,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -658,7 +671,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -699,7 +712,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -722,7 +735,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -775,7 +788,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -831,7 +844,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -881,7 +894,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -931,7 +944,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -1018,7 +1031,7 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -1059,7 +1072,7 @@ TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); @@ -1093,7 +1106,7 @@ TEST_F(IntegerIndexStorageTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr storage, IntegerIndexStorage::Create( - filesystem_, base_dir_, + filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), std::move(custom_init_unsorted_buckets)), serializer_.get())); diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc new file mode 100644 index 0000000..4de437e --- /dev/null +++ b/icing/index/numeric/integer-index.cc @@ -0,0 +1,242 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/numeric/integer-index.h" + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/filesystem.h" +#include "icing/file/memory-mapped-file.h" +#include "icing/index/numeric/integer-index-storage.h" +#include "icing/index/numeric/posting-list-integer-index-serializer.h" +#include "icing/store/document-id.h" +#include "icing/util/crc32.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +namespace { + +// Helper function to get the file name of metadata. +std::string GetMetadataFileName() { + return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m"); +} + +// Helper function to get the file path of metadata according to the given +// working directory. +std::string GetMetadataFilePath(std::string_view working_path) { + return absl_ports::StrCat(working_path, "/", GetMetadataFileName()); +} + +// Helper function to get the sub working (directory) path of +// IntegerIndexStorage according to the given working directory and property +// path. +std::string GetPropertyIndexStoragePath(std::string_view working_path, + std::string_view property_path) { + return absl_ports::StrCat(working_path, "/", property_path); +} + +// Helper function to get all existing property paths by listing all +// directories. +libtextclassifier3::StatusOr> +GetAllExistingPropertyPaths(const Filesystem& filesystem, + const std::string& working_path) { + std::vector property_paths; + if (!filesystem.ListDirectory(working_path.c_str(), + /*exclude=*/{GetMetadataFileName()}, + /*recursive=*/false, &property_paths)) { + return absl_ports::InternalError("Failed to list directory"); + } + return property_paths; +} + +libtextclassifier3::StatusOr +GetPropertyIntegerIndexStorageMap( + const Filesystem& filesystem, const std::string& working_path, + PostingListIntegerIndexSerializer* posting_list_serializer) { + ICING_ASSIGN_OR_RETURN(std::vector property_paths, + GetAllExistingPropertyPaths(filesystem, working_path)); + + IntegerIndex::PropertyToStorageMapType property_to_storage_map; + for (const std::string& property_path : property_paths) { + std::string storage_working_path = + GetPropertyIndexStoragePath(working_path, property_path); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr storage, + IntegerIndexStorage::Create(filesystem, storage_working_path, + IntegerIndexStorage::Options(), + posting_list_serializer)); + property_to_storage_map.insert( + std::make_pair(property_path, std::move(storage))); + } + + return property_to_storage_map; +} + +} // namespace + +/* static */ libtextclassifier3::StatusOr> +IntegerIndex::Create(const Filesystem& filesystem, std::string working_path) { + if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) { + // Discard working_path if metadata file is missing, and reinitialize. + ICING_RETURN_IF_ERROR(Discard(filesystem, working_path, kWorkingPathType)); + return InitializeNewFiles(filesystem, std::move(working_path)); + } + return InitializeExistingFiles(filesystem, std::move(working_path)); +} + +IntegerIndex::~IntegerIndex() { + if (!PersistToDisk().ok()) { + ICING_LOG(WARNING) + << "Failed to persist integer index to disk while destructing " + << working_path_; + } +} + +libtextclassifier3::Status IntegerIndex::Reset() { + // Step 1: clear property_to_storage_map_. + property_to_storage_map_.clear(); + + // Step 2: delete all IntegerIndexStorages. It is safe because there is no + // active IntegerIndexStorage after clearing the map. + ICING_ASSIGN_OR_RETURN( + std::vector property_paths, + GetAllExistingPropertyPaths(filesystem_, working_path_)); + for (const std::string& property_path : property_paths) { + ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard( + filesystem_, + GetPropertyIndexStoragePath(working_path_, property_path))); + } + + info()->last_added_document_id = kInvalidDocumentId; + return libtextclassifier3::Status::OK; +} + +/* static */ libtextclassifier3::StatusOr> +IntegerIndex::InitializeNewFiles(const Filesystem& filesystem, + std::string&& working_path) { + // Create working directory. + if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) { + return absl_ports::InternalError( + absl_ports::StrCat("Failed to create directory: ", working_path)); + } + + // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and + // call GrowAndRemapIfNecessary to grow the underlying file. + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile metadata_mmapped_file, + MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/kMetadataFileSize, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/kMetadataFileSize)); + ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary( + /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize)); + + // Create instance. + auto new_integer_index = std::unique_ptr(new IntegerIndex( + filesystem, std::move(working_path), + std::make_unique(), + std::make_unique(std::move(metadata_mmapped_file)), + /*property_to_storage_map=*/{})); + // Initialize info content by writing mapped memory directly. + Info* info_ptr = new_integer_index->info(); + info_ptr->magic = Info::kMagic; + info_ptr->last_added_document_id = kInvalidDocumentId; + // Initialize new PersistentStorage. The initial checksums will be computed + // and set via InitializeNewStorage. + ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage()); + + return new_integer_index; +} + +/* static */ libtextclassifier3::StatusOr> +IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem, + std::string&& working_path) { + // Mmap the content of the crcs and info. + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile metadata_mmapped_file, + MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/kMetadataFileSize, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/kMetadataFileSize)); + + auto posting_list_serializer = + std::make_unique(); + + // Initialize all existing integer index storages. + ICING_ASSIGN_OR_RETURN( + PropertyToStorageMapType property_to_storage_map, + GetPropertyIntegerIndexStorageMap(filesystem, working_path, + posting_list_serializer.get())); + + // Create instance. + auto integer_index = std::unique_ptr(new IntegerIndex( + filesystem, std::move(working_path), std::move(posting_list_serializer), + std::make_unique(std::move(metadata_mmapped_file)), + std::move(property_to_storage_map))); + // Initialize existing PersistentStorage. Checksums will be validated. + ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage()); + + // Validate magic. + if (integer_index->info()->magic != Info::kMagic) { + return absl_ports::FailedPreconditionError("Incorrect magic value"); + } + + return integer_index; +} + +libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() { + for (auto& [_, storage] : property_to_storage_map_) { + ICING_RETURN_IF_ERROR(storage->PersistToDisk()); + } + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk() { + // Changes should have been applied to the underlying file when using + // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an + // extra safety step to ensure they are written out. + return metadata_mmapped_file_->PersistToDisk(); +} + +libtextclassifier3::StatusOr IntegerIndex::ComputeInfoChecksum() { + return info()->ComputeChecksum(); +} + +libtextclassifier3::StatusOr IntegerIndex::ComputeStoragesChecksum() { + // XOR all crcs of all storages. Since XOR is commutative and associative, the + // order doesn't matter. + uint32_t storages_checksum = 0; + for (auto& [property_path, storage] : property_to_storage_map_) { + ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums()); + storage_crc.Append(property_path); + + storages_checksum ^= storage_crc.Get(); + } + return Crc32(storages_checksum); +} + +} // namespace lib +} // namespace icing diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h new file mode 100644 index 0000000..a00d339 --- /dev/null +++ b/icing/index/numeric/integer-index.h @@ -0,0 +1,190 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_H_ +#define ICING_INDEX_NUMERIC_INTEGER_INDEX_H_ + +#include +#include +#include +#include +#include + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/file/filesystem.h" +#include "icing/file/memory-mapped-file.h" +#include "icing/index/numeric/integer-index-storage.h" +#include "icing/index/numeric/numeric-index.h" +#include "icing/index/numeric/posting-list-integer-index-serializer.h" +#include "icing/store/document-id.h" +#include "icing/util/crc32.h" + +namespace icing { +namespace lib { + +// IntegerIndex: a wrapper class for managing IntegerIndexStorage (a lower level +// persistent storage class for indexing and searching contents of integer type +// sections in documents) instances for different property paths. +// We separate indexable integer data from different properties into different +// storages, and IntegerIndex manages and handles indexable integer data +// appropriately to their corresponding IntegerIndexStorage instance according +// to the given property path. +class IntegerIndex : public NumericIndex { + public: + using PropertyToStorageMapType = + std::unordered_map>; + + struct Info { + static constexpr int32_t kMagic = 0x238a3dcb; + + int32_t magic; + DocumentId last_added_document_id; + + Crc32 ComputeChecksum() const { + return Crc32( + std::string_view(reinterpret_cast(this), sizeof(Info))); + } + } __attribute__((packed)); + static_assert(sizeof(Info) == 8, ""); + + // Metadata file layout: + static constexpr int32_t kCrcsMetadataFileOffset = 0; + static constexpr int32_t kInfoMetadataFileOffset = + static_cast(sizeof(Crcs)); + static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info); + static_assert(kMetadataFileSize == 20, ""); + + static constexpr WorkingPathType kWorkingPathType = + WorkingPathType::kDirectory; + static constexpr std::string_view kFilePrefix = "integer_index"; + + // Creates a new IntegerIndex instance to index integers. If any of the + // underlying file is missing, then delete the whole working_path and + // (re)initialize with new ones. Otherwise initialize and create the instance + // by existing files. + // + // filesystem: Object to make system level calls + // working_path: Specifies the working path for PersistentStorage. + // IntegerIndex uses working path as working directory and all + // related files will be stored under this directory. See + // PersistentStorage for more details about the concept of + // working_path. + // + // Returns: + // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored + // checksum. + // - INTERNAL_ERROR on I/O errors. + // - Any FileBackedVector/MemoryMappedFile errors. + static libtextclassifier3::StatusOr> Create( + const Filesystem& filesystem, std::string working_path); + + ~IntegerIndex() override; + + // TODO(b/249829533): implement these functions and add comments. + std::unique_ptr::Editor> Edit( + std::string_view property_path, DocumentId document_id, + SectionId section_id) override; + + libtextclassifier3::StatusOr> GetIterator( + std::string_view property_path, int64_t key_lower, + int64_t key_upper) const override; + + // Clears all integer index data. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status Reset() override; + + private: + explicit IntegerIndex(const Filesystem& filesystem, + std::string&& working_path, + std::unique_ptr + posting_list_serializer, + std::unique_ptr metadata_mmapped_file, + PropertyToStorageMapType&& property_to_storage_map) + : NumericIndex(filesystem, std::move(working_path), + kWorkingPathType), + posting_list_serializer_(std::move(posting_list_serializer)), + metadata_mmapped_file_(std::move(metadata_mmapped_file)), + property_to_storage_map_(std::move(property_to_storage_map)) {} + + static libtextclassifier3::StatusOr> + InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path); + + static libtextclassifier3::StatusOr> + InitializeExistingFiles(const Filesystem& filesystem, + std::string&& working_path); + + // Flushes contents of all storages to underlying files. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status PersistStoragesToDisk() override; + + // Flushes contents of metadata file. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + libtextclassifier3::Status PersistMetadataToDisk() override; + + // Computes and returns Info checksum. + // + // Returns: + // - Crc of the Info on success + libtextclassifier3::StatusOr ComputeInfoChecksum() override; + + // Computes and returns all storages checksum. Checksums of bucket_storage_, + // entry_storage_ and kv_storage_ will be combined together by XOR. + // + // Returns: + // - Crc of all storages on success + // - INTERNAL_ERROR if any data inconsistency + libtextclassifier3::StatusOr ComputeStoragesChecksum() override; + + Crcs& crcs() override { + return *reinterpret_cast(metadata_mmapped_file_->mutable_region() + + kCrcsMetadataFileOffset); + } + + const Crcs& crcs() const override { + return *reinterpret_cast(metadata_mmapped_file_->region() + + kCrcsMetadataFileOffset); + } + + Info* info() { + return reinterpret_cast(metadata_mmapped_file_->mutable_region() + + kInfoMetadataFileOffset); + } + + const Info* info() const { + return reinterpret_cast(metadata_mmapped_file_->region() + + kInfoMetadataFileOffset); + } + + std::unique_ptr posting_list_serializer_; + + std::unique_ptr metadata_mmapped_file_; + + // Property path to integer index storage map. + PropertyToStorageMapType property_to_storage_map_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_INDEX_NUMERIC_INTEGER_INDEX_H_ diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h index 6798f8d..a9d65d4 100644 --- a/icing/index/numeric/numeric-index.h +++ b/icing/index/numeric/numeric-index.h @@ -21,6 +21,7 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/file/persistent-storage.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" @@ -29,7 +30,7 @@ namespace icing { namespace lib { template -class NumericIndex { +class NumericIndex : public PersistentStorage { public: using value_type = T; @@ -46,9 +47,9 @@ class NumericIndex { // add these records into numeric index. class Editor { public: - explicit Editor(std::string_view property_name, DocumentId document_id, + explicit Editor(std::string_view property_path, DocumentId document_id, SectionId section_id) - : property_name_(property_name), + : property_path_(property_path), document_id_(document_id), section_id_(section_id) {} @@ -69,7 +70,7 @@ class NumericIndex { virtual libtextclassifier3::Status IndexAllBufferedKeys() = 0; protected: - std::string property_name_; + std::string property_path_; DocumentId document_id_; SectionId section_id_; }; @@ -106,7 +107,7 @@ class NumericIndex { // Returns an Editor instance for adding new records into numeric index for a // given property, DocumentId and SectionId. See Editor for more details. - virtual std::unique_ptr Edit(std::string_view property_name, + virtual std::unique_ptr Edit(std::string_view property_path, DocumentId document_id, SectionId section_id) = 0; @@ -121,23 +122,36 @@ class NumericIndex { // // Returns: // - std::unique_ptr on success - // - NOT_FOUND_ERROR if there is no numeric index for property_name + // - NOT_FOUND_ERROR if there is no numeric index for property_path // - INVALID_ARGUMENT_ERROR if key_lower > key_upper // - Any other errors, depending on the actual implementation virtual libtextclassifier3::StatusOr> - GetIterator(std::string_view property_name, T key_lower, + GetIterator(std::string_view property_path, T key_lower, T key_upper) const = 0; // Clears all files created by the index. Returns OK if all files were // cleared. virtual libtextclassifier3::Status Reset() = 0; - // Syncs all the data and metadata changes to disk. - // - // Returns: - // OK on success - // INTERNAL_ERROR on I/O errors - virtual libtextclassifier3::Status PersistToDisk() = 0; + protected: + explicit NumericIndex(const Filesystem& filesystem, + std::string&& working_path, + PersistentStorage::WorkingPathType working_path_type) + : PersistentStorage(filesystem, std::move(working_path), + working_path_type) {} + + virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0; + + virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0; + + virtual libtextclassifier3::StatusOr ComputeInfoChecksum() + override = 0; + + virtual libtextclassifier3::StatusOr ComputeStoragesChecksum() + override = 0; + + virtual Crcs& crcs() override = 0; + virtual const Crcs& crcs() const override = 0; }; } // namespace lib diff --git a/icing/index/numeric/numeric-index_test.cc b/icing/index/numeric/numeric-index_test.cc index 38769f6..d4ff963 100644 --- a/icing/index/numeric/numeric-index_test.cc +++ b/icing/index/numeric/numeric-index_test.cc @@ -23,12 +23,14 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "icing/file/filesystem.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/dummy-numeric-index.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" namespace icing { namespace lib { @@ -37,6 +39,7 @@ namespace { using ::testing::ElementsAre; using ::testing::IsEmpty; +using ::testing::IsTrue; using ::testing::NotNull; constexpr static std::string_view kDefaultTestPropertyName = "test"; @@ -49,16 +52,29 @@ class NumericIndexTest : public ::testing::Test { using INDEX_IMPL_TYPE = T; void SetUp() override { + base_dir_ = GetTestTempDir() + "/icing"; + ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), + IsTrue()); + + working_path_ = base_dir_ + "/numeric_index_integer_test"; + if (std::is_same_v< INDEX_IMPL_TYPE, DummyNumericIndex>) { - numeric_index_ = std::make_unique< - DummyNumericIndex>(); + ICING_ASSERT_OK_AND_ASSIGN( + numeric_index_, + DummyNumericIndex::Create( + filesystem_, working_path_)); } ASSERT_THAT(numeric_index_, NotNull()); } + void TearDown() override { + numeric_index_.reset(); + filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()); + } + void Index(std::string_view property_name, DocumentId document_id, SectionId section_id, std::vector keys) { @@ -86,6 +102,9 @@ class NumericIndexTest : public ::testing::Test { return result; } + Filesystem filesystem_; + std::string base_dir_; + std::string working_path_; std::unique_ptr> numeric_index_; }; diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc index 9b1db7e..9a5e299 100644 --- a/icing/index/string-section-indexing-handler.cc +++ b/icing/index/string-section-indexing-handler.cc @@ -114,7 +114,7 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle( // merge. if ((status.ok() || absl_ports::IsResourceExhausted(status)) && index_.WantsMerge()) { - ICING_LOG(ERROR) << "Merging the index at docid " << document_id << "."; + ICING_LOG(INFO) << "Merging the index at docid " << document_id << "."; std::unique_ptr merge_timer = clock_.GetNewTimer(); libtextclassifier3::Status merge_status = index_.Merge(); diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc index 51f3106..f2a33e0 100644 --- a/icing/jni/icing-search-engine-jni.cc +++ b/icing/jni/icing-search-engine-jni.cc @@ -17,7 +17,6 @@ #include #include -#include #include "icing/icing-search-engine.h" #include "icing/jni/jni-cache.h" #include "icing/jni/scoped-primitive-array-critical.h" @@ -33,6 +32,7 @@ #include "icing/proto/usage.pb.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" +#include namespace { @@ -46,8 +46,8 @@ bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes, return protobuf->ParseFromArray(scoped_array.data(), scoped_array.size()); } -jbyteArray SerializeProtoToJniByteArray( - JNIEnv* env, const google::protobuf::MessageLite& protobuf) { +jbyteArray SerializeProtoToJniByteArray(JNIEnv* env, + const google::protobuf::MessageLite& protobuf) { int size = protobuf.ByteSizeLong(); jbyteArray ret = env->NewByteArray(size); if (ret == nullptr) { diff --git a/icing/join/join-children-fetcher.cc b/icing/join/join-children-fetcher.cc new file mode 100644 index 0000000..c6d1b97 --- /dev/null +++ b/icing/join/join-children-fetcher.cc @@ -0,0 +1,39 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/join/join-children-fetcher.h" + +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" + +namespace icing { +namespace lib { + +libtextclassifier3::StatusOr> +JoinChildrenFetcher::GetChildren(DocumentId parent_doc_id) const { + if (join_spec_.parent_property_expression() == kQualifiedIdExpr) { + if (auto iter = map_joinable_qualified_id_.find(parent_doc_id); + iter != map_joinable_qualified_id_.end()) { + return iter->second; + } + return std::vector(); + } + // TODO(b/256022027): So far we only support kQualifiedIdExpr for + // parent_property_expression, we could support more. + return absl_ports::UnimplementedError(absl_ports::StrCat( + "Parent property expression must be ", kQualifiedIdExpr)); +} + +} // namespace lib +} // namespace icing diff --git a/icing/join/join-children-fetcher.h b/icing/join/join-children-fetcher.h new file mode 100644 index 0000000..5f799b8 --- /dev/null +++ b/icing/join/join-children-fetcher.h @@ -0,0 +1,73 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_JOIN_JOIN_CHILDREN_FETCHER_H_ +#define ICING_JOIN_JOIN_CHILDREN_FETCHER_H_ + +#include +#include + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/proto/search.pb.h" +#include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" + +namespace icing { +namespace lib { + +// A class that provides the GetChildren method for joins to fetch all children +// documents given a parent document id. +// +// Internally, the class maintains a map for each joinable value type that +// groups children according to the joinable values. Currently we only support +// QUALIFIED_ID joining, in which the joinable value type is document id. +class JoinChildrenFetcher { + public: + explicit JoinChildrenFetcher( + const JoinSpecProto& join_spec, + std::unordered_map>&& + map_joinable_qualified_id) + : join_spec_(join_spec), + map_joinable_qualified_id_(std::move(map_joinable_qualified_id)) {} + + // Get a vector of children ScoredDocumentHit by parent document id. + // + // TODO(b/256022027): Implement property value joins with types of string and + // int. In these cases, GetChildren should look up joinable cache to fetch + // joinable property value of the given parent_doc_id according to + // join_spec_.parent_property_expression, and then fetch children by the + // corresponding map in this class using the joinable property value. + // + // Returns: + // The vector of results on success. + // UNIMPLEMENTED_ERROR if the join type specified by join_spec is not + // supported. + libtextclassifier3::StatusOr> GetChildren( + DocumentId parent_doc_id) const; + + private: + static constexpr std::string_view kQualifiedIdExpr = "this.qualifiedId()"; + + const JoinSpecProto& join_spec_; // Does not own! + + // The map that groups children by qualified id used to support QualifiedId + // joining. The joining type is document id. + std::unordered_map> + map_joinable_qualified_id_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_JOIN_JOIN_CHILDREN_FETCHER_H_ diff --git a/icing/join/join-children-fetcher_test.cc b/icing/join/join-children-fetcher_test.cc new file mode 100644 index 0000000..75e9a14 --- /dev/null +++ b/icing/join/join-children-fetcher_test.cc @@ -0,0 +1,82 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and + +#include "icing/join/join-children-fetcher.h" + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/join/join-processor.h" +#include "icing/proto/search.pb.h" +#include "icing/schema/section.h" +#include "icing/testing/common-matchers.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::ElementsAre; +using ::testing::IsEmpty; + +TEST(JoinChildrenFetcherTest, FetchQualifiedIdJoinChildren) { + JoinSpecProto join_spec; + join_spec.set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec.set_child_property_expression("sender"); + + std::unordered_map> + map_joinable_qualified_id; + DocumentId parent_doc_id = 0; + ScoredDocumentHit child1(/*document_id=*/1, kSectionIdMaskNone, + /*score=*/1.0); + ScoredDocumentHit child2(/*document_id=*/2, kSectionIdMaskNone, + /*score=*/2.0); + map_joinable_qualified_id[parent_doc_id].push_back(child1); + map_joinable_qualified_id[parent_doc_id].push_back(child2); + + JoinChildrenFetcher fetcher(join_spec, std::move(map_joinable_qualified_id)); + ICING_ASSERT_OK_AND_ASSIGN(std::vector children, + fetcher.GetChildren(parent_doc_id)); + EXPECT_THAT(children, ElementsAre(EqualsScoredDocumentHit(child1), + EqualsScoredDocumentHit(child2))); +} + +TEST(JoinChildrenFetcherTest, FetchJoinEmptyChildren) { + JoinSpecProto join_spec; + join_spec.set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec.set_child_property_expression("sender"); + + DocumentId parent_doc_id = 0; + + JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{}); + ICING_ASSERT_OK_AND_ASSIGN(std::vector children, + fetcher.GetChildren(parent_doc_id)); + EXPECT_THAT(children, IsEmpty()); +} + +TEST(JoinChildrenFetcherTest, UnsupportedJoin) { + JoinSpecProto join_spec; + join_spec.set_parent_property_expression("name"); + join_spec.set_child_property_expression("sender"); + JoinChildrenFetcher fetcher(join_spec, /*map_joinable_qualified_id=*/{}); + EXPECT_THAT(fetcher.GetChildren(0), + StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc index 7700397..ab32850 100644 --- a/icing/join/join-processor.cc +++ b/icing/join/join-processor.cc @@ -34,11 +34,17 @@ namespace icing { namespace lib { -libtextclassifier3::StatusOr> -JoinProcessor::Join( +libtextclassifier3::StatusOr +JoinProcessor::GetChildrenFetcher( const JoinSpecProto& join_spec, - std::vector&& parent_scored_document_hits, std::vector&& child_scored_document_hits) { + if (join_spec.parent_property_expression() != kQualifiedIdExpr) { + // TODO(b/256022027): So far we only support kQualifiedIdExpr for + // parent_property_expression, we could support more. + return absl_ports::UnimplementedError(absl_ports::StrCat( + "Parent property expression must be ", kQualifiedIdExpr)); + } + std::sort( child_scored_document_hits.begin(), child_scored_document_hits.end(), ScoredDocumentHitComparator( @@ -59,7 +65,7 @@ JoinProcessor::Join( // ScoredDocumentHits refer to. The values in this map are vectors of child // ScoredDocumentHits that refer to a parent DocumentId. std::unordered_map> - parent_id_to_child_map; + map_joinable_qualified_id; for (const ScoredDocumentHit& child : child_scored_document_hits) { std::string property_content = FetchPropertyExpressionValue( child.document_id(), join_spec.child_property_expression()); @@ -84,14 +90,21 @@ JoinProcessor::Join( DocumentId parent_doc_id = std::move(parent_doc_id_or).ValueOrDie(); // Since we've already sorted child_scored_document_hits, just simply omit - // if the parent_id_to_child_map[parent_doc_id].size() has reached max + // if the map_joinable_qualified_id[parent_doc_id].size() has reached max // joined child count. - if (parent_id_to_child_map[parent_doc_id].size() < + if (map_joinable_qualified_id[parent_doc_id].size() < join_spec.max_joined_child_count()) { - parent_id_to_child_map[parent_doc_id].push_back(child); + map_joinable_qualified_id[parent_doc_id].push_back(child); } } + return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id)); +} +libtextclassifier3::StatusOr> +JoinProcessor::Join( + const JoinSpecProto& join_spec, + std::vector&& parent_scored_document_hits, + const JoinChildrenFetcher& join_children_fetcher) { std::unique_ptr aggregation_scorer = AggregationScorer::Create(join_spec); @@ -100,23 +113,11 @@ JoinProcessor::Join( // Step 2: iterate through all parent documentIds and construct // JoinedScoredDocumentHit for each by looking up - // parent_id_to_child_map. + // join_children_fetcher. for (ScoredDocumentHit& parent : parent_scored_document_hits) { - DocumentId parent_doc_id = kInvalidDocumentId; - if (join_spec.parent_property_expression() == kQualifiedIdExpr) { - parent_doc_id = parent.document_id(); - } else { - // TODO(b/256022027): So far we only support kQualifiedIdExpr for - // parent_property_expression, we could support more. - return absl_ports::UnimplementedError(absl_ports::StrCat( - "Parent property expression must be ", kQualifiedIdExpr)); - } - - std::vector children; - if (auto iter = parent_id_to_child_map.find(parent_doc_id); - iter != parent_id_to_child_map.end()) { - children = std::move(iter->second); - } + ICING_ASSIGN_OR_RETURN( + std::vector children, + join_children_fetcher.GetChildren(parent.document_id())); double final_score = aggregation_scorer->GetScore(parent, children); joined_scored_document_hits.emplace_back(final_score, std::move(parent), diff --git a/icing/join/join-processor.h b/icing/join/join-processor.h index 65c9e5f..9d5ee11 100644 --- a/icing/join/join-processor.h +++ b/icing/join/join-processor.h @@ -20,6 +20,7 @@ #include #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/join/join-children-fetcher.h" #include "icing/proto/search.pb.h" #include "icing/scoring/scored-document-hit.h" #include "icing/store/document-store.h" @@ -34,10 +35,21 @@ class JoinProcessor { explicit JoinProcessor(const DocumentStore* doc_store) : doc_store_(doc_store) {} + // Get a JoinChildrenFetcher used to fetch all children documents by a parent + // document id. + // + // Returns: + // A JoinChildrenFetcher instance on success. + // UNIMPLEMENTED_ERROR if the join type specified by join_spec is not + // supported. + libtextclassifier3::StatusOr GetChildrenFetcher( + const JoinSpecProto& join_spec, + std::vector&& child_scored_document_hits); + libtextclassifier3::StatusOr> Join( const JoinSpecProto& join_spec, std::vector&& parent_scored_document_hits, - std::vector&& child_scored_document_hits); + const JoinChildrenFetcher& join_children_fetcher); private: // Loads a document and uses a property expression to fetch the value of the diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc index 70eaf3f..00f2b1c 100644 --- a/icing/join/join-processor_test.cc +++ b/icing/join/join-processor_test.cc @@ -88,6 +88,20 @@ class JoinProcessorTest : public ::testing::Test { filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); } + libtextclassifier3::StatusOr> Join( + const JoinSpecProto& join_spec, + std::vector&& parent_scored_document_hits, + std::vector&& child_scored_document_hits) { + JoinProcessor join_processor(doc_store_.get()); + ICING_ASSIGN_OR_RETURN( + JoinChildrenFetcher join_children_fetcher, + join_processor.GetChildrenFetcher( + join_spec, std::move(child_scored_document_hits))); + return join_processor.Join(join_spec, + std::move(parent_scored_document_hits), + join_children_fetcher); + } + Filesystem filesystem_; std::string test_dir_; std::unique_ptr schema_store_; @@ -165,11 +179,10 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) { join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); EXPECT_THAT( joined_result_document_hits, ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit( @@ -232,11 +245,10 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) { join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); // Since Email2 doesn't have "sender" property, it should be ignored. EXPECT_THAT( joined_result_document_hits, @@ -310,11 +322,10 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) { join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); // Email 2 and email 3 (document id 3 and 4) contain invalid qualified ids. // Join processor should ignore them. EXPECT_THAT(joined_result_document_hits, @@ -373,11 +384,10 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) { join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); // Person1 has no child documents, but left join should also include it. EXPECT_THAT( joined_result_document_hits, @@ -452,11 +462,10 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) { join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); // Child documents should be sorted according to the (nested) ranking // strategy. EXPECT_THAT( @@ -548,11 +557,10 @@ TEST_F(JoinProcessorTest, join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); // Since we set max_joind_child_count as 2 and use DESC as the (nested) // ranking strategy, parent document with # of child documents more than 2 // should only keep 2 child documents with higher scores and the rest should @@ -601,11 +609,10 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) { join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( ScoringSpecProto::Order::DESC); - JoinProcessor join_processor(doc_store_.get()); ICING_ASSERT_OK_AND_ASSIGN( std::vector joined_result_document_hits, - join_processor.Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); + Join(join_spec, std::move(parent_scored_document_hits), + std::move(child_scored_document_hits))); EXPECT_THAT(joined_result_document_hits, ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit( /*final_score=*/1.0, diff --git a/icing/portable/equals-proto.h b/icing/portable/equals-proto.h index 6a600be..8bb835e 100644 --- a/icing/portable/equals-proto.h +++ b/icing/portable/equals-proto.h @@ -20,8 +20,8 @@ #ifndef ICING_PORTABLE_EQUALS_PROTO_H_ #define ICING_PORTABLE_EQUALS_PROTO_H_ +#include "gmock/gmock.h" // IWYU pragma: export #include // IWYU pragma: export -#include "gmock/gmock.h" // IWYU pragma: export #if defined(__ANDROID__) || defined(__APPLE__) namespace icing { diff --git a/icing/portable/gzip_stream.h b/icing/portable/gzip_stream.h index 602093f..8008a55 100644 --- a/icing/portable/gzip_stream.h +++ b/icing/portable/gzip_stream.h @@ -27,8 +27,8 @@ #ifndef GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_ #define GOOGLE3_ICING_PORTABLE_GZIP_STREAM_H_ -#include #include "icing/portable/zlib.h" +#include namespace icing { namespace lib { @@ -50,9 +50,8 @@ class GzipInputStream : public google::protobuf::io::ZeroCopyInputStream { }; // buffer_size and format may be -1 for default of 64kB and GZIP format - explicit GzipInputStream( - google::protobuf::io::ZeroCopyInputStream* sub_stream, - Format format = AUTO, int buffer_size = -1); + explicit GzipInputStream(google::protobuf::io::ZeroCopyInputStream* sub_stream, + Format format = AUTO, int buffer_size = -1); virtual ~GzipInputStream(); // Return last error message or NULL if no error. @@ -113,13 +112,11 @@ class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream { }; // Create a GzipOutputStream with default options. - explicit GzipOutputStream( - google::protobuf::io::ZeroCopyOutputStream* sub_stream); + explicit GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream); // Create a GzipOutputStream with the given options. - GzipOutputStream( - google::protobuf::io::ZeroCopyOutputStream* sub_stream, - const Options& options); + GzipOutputStream(google::protobuf::io::ZeroCopyOutputStream* sub_stream, + const Options& options); virtual ~GzipOutputStream(); @@ -164,9 +161,8 @@ class GzipOutputStream : public google::protobuf::io::ZeroCopyOutputStream { size_t input_buffer_length_; // Shared constructor code. - void Init( - google::protobuf::io::ZeroCopyOutputStream* sub_stream, - const Options& options); + void Init(google::protobuf::io::ZeroCopyOutputStream* sub_stream, + const Options& options); // Do some compression. // Takes zlib flush mode. diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc index 924b41f..659ad7b 100644 --- a/icing/query/advanced_query_parser/query-visitor.cc +++ b/icing/query/advanced_query_parser/query-visitor.cc @@ -105,11 +105,44 @@ libtextclassifier3::StatusOr GetInt64Range( } // namespace -libtextclassifier3::StatusOr QueryVisitor::RetrieveIntValue() { - if (pending_values_.empty() || !pending_values_.top().holds_text()) { +libtextclassifier3::StatusOr> +QueryVisitor::CreateTermIterator(const std::string& term) { + if (!processing_not_) { + // 1. Add term to property_query_terms_map + auto property_restrict_or = GetPropertyRestrict(); + if (property_restrict_or.ok()) { + std::string property_restrict = + std::move(property_restrict_or).ValueOrDie(); + property_query_terms_map_[std::move(property_restrict)].insert(term); + } else { + ICING_LOG(DBG) << "Unsatisfiable property restrict, " + << property_restrict_or.status().error_message(); + } + + // 2. If needed add term iterator to query_term_iterators_ map. + if (needs_term_frequency_info_) { + ICING_ASSIGN_OR_RETURN( + std::unique_ptr term_iterator, + index_.GetIterator(term, kSectionIdMaskAll, match_type_, + needs_term_frequency_info_)); + query_term_iterators_[term] = std::make_unique( + std::move(term_iterator), &document_store_, &schema_store_, + filter_options_); + } + } + + // 3. Add the term iterator. + // TODO(b/208654892): Add support for the prefix operator (*). + return index_.GetIterator(term, kSectionIdMaskAll, match_type_, + needs_term_frequency_info_); +} + +libtextclassifier3::StatusOr QueryVisitor::PopPendingIntValue() { + if (pending_values_.empty() || + pending_values_.top().data_type() != PendingValue::DataType::kText) { return absl_ports::InvalidArgumentError("Unable to retrieve int value."); } - std::string& value = pending_values_.top().text; + const std::string& value = pending_values_.top().term(); char* value_end; int64_t int_value = std::strtoll(value.c_str(), &value_end, /*base=*/10); if (value_end != value.c_str() + value.length()) { @@ -120,54 +153,71 @@ libtextclassifier3::StatusOr QueryVisitor::RetrieveIntValue() { return int_value; } -libtextclassifier3::StatusOr QueryVisitor::RetrieveStringValue() { - if (pending_values_.empty() || !pending_values_.top().holds_text()) { - return absl_ports::InvalidArgumentError("Unable to retrieve string value."); +libtextclassifier3::StatusOr +QueryVisitor::PopPendingStringValue() { + if (pending_values_.empty() || + pending_values_.top().data_type() != PendingValue::DataType::kString) { + return absl_ports::InvalidArgumentError("Unable to retrieve text value."); } - std::string string_value = std::move(pending_values_.top().text); + std::string string_value = std::move(pending_values_.top().term()); pending_values_.pop(); return string_value; } +libtextclassifier3::StatusOr QueryVisitor::PopPendingTextValue() { + if (pending_values_.empty() || + pending_values_.top().data_type() != PendingValue::DataType::kText) { + return absl_ports::InvalidArgumentError("Unable to retrieve text value."); + } + std::string text_value = std::move(pending_values_.top().term()); + pending_values_.pop(); + return text_value; +} + libtextclassifier3::StatusOr> -QueryVisitor::RetrieveIterator() { - if (pending_values_.top().holds_iterator()) { +QueryVisitor::PopPendingIterator() { + if (pending_values_.empty() || pending_values_.top().is_placeholder()) { + return absl_ports::InvalidArgumentError("Unable to retrieve iterator."); + } + if (pending_values_.top().data_type() == + PendingValue::DataType::kDocIterator) { std::unique_ptr iterator = - std::move(pending_values_.top().iterator); + std::move(pending_values_.top().iterator()); pending_values_.pop(); return iterator; - } - ICING_ASSIGN_OR_RETURN(std::string value, RetrieveStringValue()); - if (!processing_not_ && needs_term_frequency_info_) { - ICING_ASSIGN_OR_RETURN( - std::unique_ptr term_iterator, - index_.GetIterator(value, kSectionIdMaskAll, match_type_, - needs_term_frequency_info_)); - query_term_iterators_[value] = std::make_unique( - std::move(term_iterator), &document_store_, &schema_store_, - filter_options_); - } - if (!processing_not_) { - auto property_restrict_or = GetPropertyRestrict(); - if (property_restrict_or.ok()) { - property_query_terms_map_[std::move(property_restrict_or).ValueOrDie()] - .insert(value); - } else { - ICING_LOG(DBG) << "Unsatisfiable property restrict, " - << property_restrict_or.status().error_message(); + } else if (pending_values_.top().data_type() == + PendingValue::DataType::kString) { + features_.insert(kVerbatimSearchFeature); + ICING_ASSIGN_OR_RETURN(std::string value, PopPendingStringValue()); + return CreateTermIterator(std::move(value)); + } else { + ICING_ASSIGN_OR_RETURN(std::string value, PopPendingTextValue()); + ICING_ASSIGN_OR_RETURN(std::unique_ptr token_itr, + tokenizer_.Tokenize(value)); + std::string normalized_term; + std::vector> iterators; + while (token_itr->Advance()) { + for (const Token& token : token_itr->GetTokens()) { + normalized_term = normalizer_.NormalizeTerm(token.text); + ICING_ASSIGN_OR_RETURN(std::unique_ptr iterator, + CreateTermIterator(std::move(normalized_term))); + iterators.push_back(std::move(iterator)); + } } + + // Finally, create an And Iterator. If there's only a single term here, then + // it will just return that term iterator. Otherwise, segmented text is + // treated as a group of terms AND'd together. + return CreateAndIterator(std::move(iterators)); } - // Make it into a term iterator. - return index_.GetIterator(value, kSectionIdMaskAll, match_type_, - needs_term_frequency_info_); } libtextclassifier3::StatusOr>> -QueryVisitor::RetrieveIterators() { +QueryVisitor::PopAllPendingIterators() { std::vector> iterators; while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) { ICING_ASSIGN_OR_RETURN(std::unique_ptr itr, - RetrieveIterator()); + PopPendingIterator()); iterators.push_back(std::move(itr)); } if (pending_values_.empty()) { @@ -185,8 +235,8 @@ QueryVisitor::ProcessNumericComparator(const NaryOperatorNode* node) { // 1. The children should have been processed and added their outputs to // pending_values_. Time to process them. // The first two pending values should be the int value and the property. - ICING_ASSIGN_OR_RETURN(int64_t int_value, RetrieveIntValue()); - ICING_ASSIGN_OR_RETURN(std::string property, RetrieveStringValue()); + ICING_ASSIGN_OR_RETURN(int64_t int_value, PopPendingIntValue()); + ICING_ASSIGN_OR_RETURN(std::string property, PopPendingTextValue()); // 2. Create the iterator. ICING_ASSIGN_OR_RETURN(Int64Range range, @@ -207,7 +257,7 @@ libtextclassifier3::StatusOr QueryVisitor::ProcessAndOperator(const NaryOperatorNode* node) { ICING_ASSIGN_OR_RETURN( std::vector> iterators, - RetrieveIterators()); + PopAllPendingIterators()); return PendingValue(CreateAndIterator(std::move(iterators))); } @@ -215,23 +265,18 @@ libtextclassifier3::StatusOr QueryVisitor::ProcessOrOperator(const NaryOperatorNode* node) { ICING_ASSIGN_OR_RETURN( std::vector> iterators, - RetrieveIterators()); + PopAllPendingIterators()); return PendingValue(CreateOrIterator(std::move(iterators))); } libtextclassifier3::StatusOr QueryVisitor::ProcessHasOperator(const NaryOperatorNode* node) { - // 1. The children should have been processed and added their outputs to + // The children should have been processed and added their outputs to // pending_values_. Time to process them. // The first two pending values should be the delegate and the property. ICING_ASSIGN_OR_RETURN(std::unique_ptr delegate, - RetrieveIterator()); - // TODO(b/208654892): The HAS operator need to be able to differentiate - // between values that came from STRING nodes and those that came from - // members. members should be allowed as the left operator to HAS, but STRINGs - // should not be. IOW, `"prop1":foo` should not be treated equivalently to - // `prop1:foo` - ICING_ASSIGN_OR_RETURN(std::string property, RetrieveStringValue()); + PopPendingIterator()); + ICING_ASSIGN_OR_RETURN(std::string property, PopPendingTextValue()); return PendingValue(std::make_unique( std::move(delegate), &document_store_, &schema_store_, std::move(property))); @@ -259,21 +304,22 @@ void QueryVisitor::VisitFunctionName(const FunctionNameNode* node) { } void QueryVisitor::VisitString(const StringNode* node) { + // A STRING node can only be a term. Create the iterator now. auto escaped_string_or = EscapeStringValue(node->value()); if (!escaped_string_or.ok()) { pending_error_ = std::move(escaped_string_or).status(); return; } - features_.insert(kVerbatimSearchFeature); - std::string escaped_string = std::move(escaped_string_or).ValueOrDie(); - pending_values_.push(PendingValue(std::move(escaped_string))); + pending_values_.push(PendingValue::CreateStringPendingValue( + std::move(escaped_string_or).ValueOrDie())); } void QueryVisitor::VisitText(const TextNode* node) { - // TODO(b/208654892): Add support for 1. segmentation and 2. the prefix - // prefix operator (*). - std::string normalized_text = normalizer_.NormalizeTerm(node->value()); - pending_values_.push(PendingValue(std::move(normalized_text))); + // TEXT nodes could either be a term (and will become DocHitInfoIteratorTerm) + // or a property name. As such, we just push the TEXT value into pending + // values and determine which it is at a later point. + pending_values_.push( + PendingValue::CreateTextPendingValue(std::move(node->value()))); } void QueryVisitor::VisitMember(const MemberNode* node) { @@ -290,11 +336,11 @@ void QueryVisitor::VisitMember(const MemberNode* node) { // 3. The children should have been processed and added their outputs to // pending_values_. Time to process them. - std::string member = std::move(pending_values_.top().text); + std::string member = std::move(pending_values_.top().term()); pending_values_.pop(); while (!pending_values_.empty() && !pending_values_.top().is_placeholder()) { - member = absl_ports::StrCat(pending_values_.top().text, kPropertySeparator, - member); + member = absl_ports::StrCat(pending_values_.top().term(), + kPropertySeparator, member); pending_values_.pop(); } @@ -307,7 +353,7 @@ void QueryVisitor::VisitMember(const MemberNode* node) { } pending_values_.pop(); - pending_values_.push(PendingValue(std::move(member))); + pending_values_.push(PendingValue::CreateTextPendingValue(std::move(member))); } void QueryVisitor::VisitFunction(const FunctionNode* node) { @@ -347,7 +393,7 @@ void QueryVisitor::VisitUnaryOperator(const UnaryOperatorNode* node) { } // 3. Retrieve the delegate iterator - auto iterator_or = RetrieveIterator(); + auto iterator_or = PopPendingIterator(); if (!iterator_or.ok()) { pending_error_ = std::move(iterator_or).status(); return; @@ -389,12 +435,12 @@ void QueryVisitor::VisitNaryOperator(const NaryOperatorNode* node) { return; } if (processing_has && !processing_not_ && i == 0) { - if (!pending_values_.top().holds_text()) { + if (pending_values_.top().data_type() != PendingValue::DataType::kText) { pending_error_ = absl_ports::InvalidArgumentError( "Expected property before ':' operator."); return; } - pending_property_restricts_.push_back(pending_values_.top().text); + pending_property_restricts_.push_back(pending_values_.top().term()); } } @@ -437,7 +483,7 @@ libtextclassifier3::StatusOr QueryVisitor::ConsumeResults() && { return absl_ports::InvalidArgumentError( "Visitor does not contain a single root iterator."); } - auto iterator_or = RetrieveIterator(); + auto iterator_or = PopPendingIterator(); if (!iterator_or.ok()) { return std::move(iterator_or).status(); } diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h index 9e68572..414f1b9 100644 --- a/icing/query/advanced_query_parser/query-visitor.h +++ b/icing/query/advanced_query_parser/query-visitor.h @@ -21,7 +21,6 @@ #include #include -#include "icing/absl_ports/canonical_errors.h" #include "icing/index/index.h" #include "icing/index/iterator/doc-hit-info-iterator-filter.h" #include "icing/index/iterator/doc-hit-info-iterator.h" @@ -31,6 +30,7 @@ #include "icing/query/query-results.h" #include "icing/schema/schema-store.h" #include "icing/store/document-store.h" +#include "icing/tokenization/tokenizer.h" #include "icing/transform/normalizer.h" namespace icing { @@ -40,19 +40,18 @@ namespace lib { // the parser. class QueryVisitor : public AbstractSyntaxTreeVisitor { public: - explicit QueryVisitor(Index* index, - const NumericIndex* numeric_index, - const DocumentStore* document_store, - const SchemaStore* schema_store, - const Normalizer* normalizer, - DocHitInfoIteratorFilter::Options filter_options, - TermMatchType::Code match_type, - bool needs_term_frequency_info) + explicit QueryVisitor( + Index* index, const NumericIndex* numeric_index, + const DocumentStore* document_store, const SchemaStore* schema_store, + const Normalizer* normalizer, const Tokenizer* tokenizer, + DocHitInfoIteratorFilter::Options filter_options, + TermMatchType::Code match_type, bool needs_term_frequency_info) : index_(*index), numeric_index_(*numeric_index), document_store_(*document_store), schema_store_(*schema_store), normalizer_(*normalizer), + tokenizer_(*tokenizer), filter_options_(std::move(filter_options)), match_type_(match_type), needs_term_frequency_info_(needs_term_frequency_info), @@ -74,41 +73,85 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor { private: // A holder for intermediate results when processing child nodes. - struct PendingValue { - PendingValue() = default; + class PendingValue { + public: + enum class DataType { + kNone, + // Values of type STRING will eventually be converted to a + // DocHitInfoIterator further upstream. + kString, + + // Values of type TEXT may be consumed as properties, numbers or converted + // to DocHitInfoIterators further upstream. + kText, + kDocIterator, + }; + + static PendingValue CreateStringPendingValue(std::string str) { + return PendingValue(std::move(str), DataType::kString); + } + + static PendingValue CreateTextPendingValue(std::string text) { + return PendingValue(std::move(text), DataType::kText); + } + + PendingValue() : data_type_(DataType::kNone) {} explicit PendingValue(std::unique_ptr iterator) - : iterator(std::move(iterator)) {} - - explicit PendingValue(std::string text) : text(std::move(text)) {} + : iterator_(std::move(iterator)), data_type_(DataType::kDocIterator) {} // Placeholder is used to indicate where the children of a particular node // begin. - bool is_placeholder() const { return iterator == nullptr && text.empty(); } + bool is_placeholder() const { return data_type_ == DataType::kNone; } + + DataType data_type() const { return data_type_; } + + std::unique_ptr& iterator() { return iterator_; } + const std::unique_ptr& iterator() const { + return iterator_; + } - bool holds_text() const { return iterator == nullptr && !text.empty(); } + std::string& term() { return term_; } + const std::string& term() const { return term_; } - bool holds_iterator() const { return iterator != nullptr && text.empty(); } + private: + explicit PendingValue(std::string term, DataType data_type) + : term_(std::move(term)), data_type_(data_type) {} - std::unique_ptr iterator; - std::string text; + std::unique_ptr iterator_; + std::string term_; + DataType data_type_; }; bool has_pending_error() const { return !pending_error_.ok(); } + // Creates a DocHitInfoIterator reflecting the provided term. Also populates, + // property_query_terms_map_ and query_term_iterators_ as appropriate. + // Returns: + // - On success, a DocHitInfoIterator for the provided term + // - INVALID_ARGUMENT if unable to create an iterator for the term. + libtextclassifier3::StatusOr> + CreateTermIterator(const std::string& term); + // Processes the PendingValue at the top of pending_values_, parses it into a // int64_t and pops the top. // Returns: // - On success, the int value stored in the text at the top // - INVALID_ARGUMENT if pending_values_ is empty, doesn't hold a text or // can't be parsed as an int. - libtextclassifier3::StatusOr RetrieveIntValue(); + libtextclassifier3::StatusOr PopPendingIntValue(); + + // Processes the PendingValue at the top of pending_values_ and pops the top. + // Returns: + // - On success, the string value stored in the text at the top + // - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a string. + libtextclassifier3::StatusOr PopPendingStringValue(); // Processes the PendingValue at the top of pending_values_ and pops the top. // Returns: // - On success, the string value stored in the text at the top // - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a text. - libtextclassifier3::StatusOr RetrieveStringValue(); + libtextclassifier3::StatusOr PopPendingTextValue(); // Processes the PendingValue at the top of pending_values_ and pops the top. // Returns: @@ -116,7 +159,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor { // - INVALID_ARGUMENT if pending_values_ is empty or if unable to create an // iterator for the term. libtextclassifier3::StatusOr> - RetrieveIterator(); + PopPendingIterator(); // Processes all PendingValues at the top of pending_values_ until the first // placeholder is encounter. @@ -126,7 +169,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor { // - INVALID_ARGUMENT if pending_values_is empty or if unable to create an // iterator for any of the terms at the top of pending_values_ libtextclassifier3::StatusOr>> - RetrieveIterators(); + PopAllPendingIterators(); // Processes the NumericComparator represented by node. This must be called // *after* this node's children have been visited. The PendingValues added by @@ -193,6 +236,7 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor { const DocumentStore& document_store_; // Does not own! const SchemaStore& schema_store_; // Does not own! const Normalizer& normalizer_; // Does not own! + const Tokenizer& tokenizer_; // Does not own! DocHitInfoIteratorFilter::Options filter_options_; TermMatchType::Code match_type_; diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc index f15bd7f..1577a3f 100644 --- a/icing/query/advanced_query_parser/query-visitor_test.cc +++ b/icing/query/advanced_query_parser/query-visitor_test.cc @@ -27,6 +27,7 @@ #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/dummy-numeric-index.h" #include "icing/index/numeric/numeric-index.h" +#include "icing/jni/jni-cache.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/portable/platform.h" #include "icing/query/advanced_query_parser/abstract-syntax-tree.h" @@ -36,10 +37,16 @@ #include "icing/schema-builder.h" #include "icing/testing/common-matchers.h" #include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/jni-test-helpers.h" #include "icing/testing/test-data.h" #include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/tokenization/language-segmenter.h" +#include "icing/tokenization/tokenizer-factory.h" +#include "icing/tokenization/tokenizer.h" #include "icing/transform/normalizer-factory.h" #include "icing/transform/normalizer.h" +#include "unicode/uloc.h" namespace icing { namespace lib { @@ -73,6 +80,7 @@ class QueryVisitorTest : public ::testing::Test { void SetUp() override { test_dir_ = GetTestTempDir() + "/icing"; index_dir_ = test_dir_ + "/index"; + numeric_index_dir_ = test_dir_ + "/numeric_index"; store_dir_ = test_dir_ + "/store"; schema_store_dir_ = test_dir_ + "/schema_store"; filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); @@ -80,6 +88,8 @@ class QueryVisitorTest : public ::testing::Test { filesystem_.CreateDirectoryRecursively(store_dir_.c_str()); filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()); + jni_cache_ = GetTestJniCache(); + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { // If we've specified using the reverse-JNI method for segmentation (i.e. // not ICU), then we won't have the ICU data file included to set up. @@ -107,10 +117,23 @@ class QueryVisitorTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( index_, Index::Create(options, &filesystem_, &icing_filesystem_)); - numeric_index_ = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + numeric_index_, + DummyNumericIndex::Create(filesystem_, numeric_index_dir_)); ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( /*max_term_byte_size=*/1000)); + + language_segmenter_factory::SegmenterOptions segmenter_options( + ULOC_US, jni_cache_.get()); + ICING_ASSERT_OK_AND_ASSIGN( + language_segmenter_, + language_segmenter_factory::Create(segmenter_options)); + + ICING_ASSERT_OK_AND_ASSIGN(tokenizer_, + tokenizer_factory::CreateIndexingTokenizer( + StringIndexingConfig::TokenizerType::PLAIN, + language_segmenter_.get())); } libtextclassifier3::StatusOr> ParseQueryHelper( @@ -126,6 +149,7 @@ class QueryVisitorTest : public ::testing::Test { IcingFilesystem icing_filesystem_; std::string test_dir_; std::string index_dir_; + std::string numeric_index_dir_; std::string schema_store_dir_; std::string store_dir_; Clock clock_; @@ -134,6 +158,9 @@ class QueryVisitorTest : public ::testing::Test { std::unique_ptr index_; std::unique_ptr> numeric_index_; std::unique_ptr normalizer_; + std::unique_ptr language_segmenter_; + std::unique_ptr tokenizer_; + std::unique_ptr jni_cache_; }; TEST_F(QueryVisitorTest, SimpleLessThan) { @@ -157,7 +184,7 @@ TEST_F(QueryVisitorTest, SimpleLessThan) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -194,7 +221,7 @@ TEST_F(QueryVisitorTest, SimpleLessThanEq) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -231,7 +258,7 @@ TEST_F(QueryVisitorTest, SimpleEqual) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -268,7 +295,7 @@ TEST_F(QueryVisitorTest, SimpleGreaterThanEq) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -305,7 +332,7 @@ TEST_F(QueryVisitorTest, SimpleGreaterThan) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -344,7 +371,7 @@ TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanEqual) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -382,7 +409,7 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanEqual) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -421,7 +448,7 @@ TEST_F(QueryVisitorTest, NestedPropertyLessThan) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -443,7 +470,7 @@ TEST_F(QueryVisitorTest, IntParsingError) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -457,7 +484,7 @@ TEST_F(QueryVisitorTest, NotEqualsUnsupported) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -502,7 +529,7 @@ TEST_F(QueryVisitorTest, LessThanTooManyOperandsInvalid) { auto root_node = std::make_unique("<", std::move(args)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -526,7 +553,7 @@ TEST_F(QueryVisitorTest, LessThanTooFewOperandsInvalid) { auto root_node = std::make_unique("<", std::move(args)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -559,7 +586,7 @@ TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -570,7 +597,7 @@ TEST_F(QueryVisitorTest, LessThanNonExistentPropertyNotFound) { TEST_F(QueryVisitorTest, NeverVisitedReturnsInvalid) { QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); EXPECT_THAT(std::move(query_visitor).ConsumeResults(), @@ -600,7 +627,7 @@ TEST_F(QueryVisitorTest, DISABLED_IntMinLessThanInvalid) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -630,7 +657,7 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -638,41 +665,19 @@ TEST_F(QueryVisitorTest, IntMaxGreaterThanInvalid) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(QueryVisitorTest, SingleTerm) { - // Setup the index with docs 0, 1 and 2 holding the values "foo", "foo" and - // "bar" respectively. - Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1, - TERM_MATCH_PREFIX, /*namespace_id=*/0); - editor.BufferTerm("foo"); - editor.IndexAllBufferedTerms(); - - editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX, - /*namespace_id=*/0); - editor.BufferTerm("foo"); - editor.IndexAllBufferedTerms(); - - editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX, - /*namespace_id=*/0); - editor.BufferTerm("bar"); - editor.IndexAllBufferedTerms(); - - std::string query = "foo"; +TEST_F(QueryVisitorTest, NumericComparisonPropertyStringIsInvalid) { + // "price" is a STRING token, which cannot be a property name. + std::string query = R"("price" > 7)"; ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr root_node, ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); - ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, - std::move(query_visitor).ConsumeResults()); - EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), - UnorderedElementsAre("foo")); - EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre("")); - EXPECT_THAT(query_results.query_terms[""], UnorderedElementsAre("foo")); - EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), - ElementsAre(kDocumentId1, kDocumentId0)); + EXPECT_THAT(std::move(query_visitor).ConsumeResults(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) { @@ -698,7 +703,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyEnabled) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -750,7 +755,7 @@ TEST_F(QueryVisitorTest, SingleTermTermFrequencyDisabled) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/false); root_node->Accept(&query_visitor); @@ -801,7 +806,7 @@ TEST_F(QueryVisitorTest, SingleVerbatimTerm) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -854,7 +859,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingQuote) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -900,7 +905,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingEscape) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -948,7 +953,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -969,7 +974,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingNonSpecialChar) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1018,7 +1023,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1038,7 +1043,7 @@ TEST_F(QueryVisitorTest, VerbatimTermNewLine) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1081,7 +1086,7 @@ TEST_F(QueryVisitorTest, VerbatimTermEscapingComplex) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1132,7 +1137,7 @@ TEST_F(QueryVisitorTest, SingleMinusTerm) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1179,7 +1184,7 @@ TEST_F(QueryVisitorTest, SingleNotTerm) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1231,7 +1236,7 @@ TEST_F(QueryVisitorTest, NestedNotTerms) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1297,7 +1302,7 @@ TEST_F(QueryVisitorTest, DeeplyNestedNotTerms) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1335,7 +1340,7 @@ TEST_F(QueryVisitorTest, ImplicitAndTerms) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1373,7 +1378,7 @@ TEST_F(QueryVisitorTest, ExplicitAndTerms) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1411,7 +1416,7 @@ TEST_F(QueryVisitorTest, OrTerms) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1451,7 +1456,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1471,7 +1476,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1490,7 +1495,7 @@ TEST_F(QueryVisitorTest, AndOrTermPrecedence) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_three( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_three); @@ -1545,7 +1550,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1564,7 +1569,7 @@ TEST_F(QueryVisitorTest, AndOrNotPrecedence) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1625,7 +1630,7 @@ TEST_F(QueryVisitorTest, PropertyFilter) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1641,6 +1646,99 @@ TEST_F(QueryVisitorTest, PropertyFilter) { ElementsAre(kDocumentId1, kDocumentId0)); } +TEST_F(QueryVisitorTest, PropertyFilterStringIsInvalid) { + ICING_ASSERT_OK(schema_store_->SetSchema( + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("type") + .AddProperty(PropertyConfigBuilder() + .SetName("prop1") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("prop2") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build())); + + // "prop1" is a STRING token, which cannot be a property name. + std::string query = R"("prop1":foo)"; + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + EXPECT_THAT(std::move(query_visitor).ConsumeResults(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(QueryVisitorTest, PropertyFilterNonNormalized) { + ICING_ASSERT_OK(schema_store_->SetSchema( + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("type") + .AddProperty(PropertyConfigBuilder() + .SetName("PROP1") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("PROP2") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build())); + // Section ids are assigned alphabetically. + SectionId prop1_section_id = 0; + SectionId prop2_section_id = 1; + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); + Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id, + TERM_MATCH_PREFIX, /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build())); + editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build())); + editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + std::string query = "PROP1:foo"; + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, + std::move(query_visitor).ConsumeResults()); + EXPECT_THAT(ExtractKeys(query_results.query_terms), + UnorderedElementsAre("PROP1")); + EXPECT_THAT(query_results.query_terms["PROP1"], UnorderedElementsAre("foo")); + EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), + UnorderedElementsAre("foo")); + EXPECT_THAT(query_results.features_in_use, IsEmpty()); + EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), + ElementsAre(kDocumentId1, kDocumentId0)); +} + TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) { ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() @@ -1687,7 +1785,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithGrouping) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1750,7 +1848,7 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1769,7 +1867,7 @@ TEST_F(QueryVisitorTest, ValidNestedPropertyFilter) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1831,7 +1929,7 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1847,7 +1945,7 @@ TEST_F(QueryVisitorTest, InvalidNestedPropertyFilter) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1906,7 +2004,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) { ParseQueryHelper(query)); QueryVisitor query_visitor( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor); @@ -1922,7 +2020,7 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) { ICING_ASSERT_OK_AND_ASSIGN(root_node, ParseQueryHelper(query)); QueryVisitor query_visitor_two( index_.get(), numeric_index_.get(), document_store_.get(), - schema_store_.get(), normalizer_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, /*needs_term_frequency_info_=*/true); root_node->Accept(&query_visitor_two); @@ -1935,6 +2033,91 @@ TEST_F(QueryVisitorTest, PropertyFilterWithNot) { ElementsAre(kDocumentId2)); } +TEST_F(QueryVisitorTest, SegmentationTest) { + ICING_ASSERT_OK(schema_store_->SetSchema( + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("type") + .AddProperty(PropertyConfigBuilder() + .SetName("prop1") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("prop2") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build())); + // Section ids are assigned alphabetically. + SectionId prop1_section_id = 0; + SectionId prop2_section_id = 1; + + // ICU segmentation will break this into "每天" and "上班". + // CFStringTokenizer (ios) will break this into "每", "天" and "上班" + std::string query = "每天上班"; + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); + Index::Editor editor = index_->Edit(kDocumentId0, prop1_section_id, + TERM_MATCH_PREFIX, /*namespace_id=*/0); + editor.BufferTerm("上班"); + editor.IndexAllBufferedTerms(); + editor = index_->Edit(kDocumentId0, prop2_section_id, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + if (IsCfStringTokenization()) { + editor.BufferTerm("每"); + editor.BufferTerm("天"); + } else { + editor.BufferTerm("每天"); + } + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build())); + editor = index_->Edit(kDocumentId1, prop1_section_id, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("上班"); + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build())); + editor = index_->Edit(kDocumentId2, prop2_section_id, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + if (IsCfStringTokenization()) { + editor.BufferTerm("每"); + editor.BufferTerm("天"); + } else { + editor.BufferTerm("每天"); + } + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, + std::move(query_visitor).ConsumeResults()); + EXPECT_THAT(query_results.features_in_use, IsEmpty()); + EXPECT_THAT(ExtractKeys(query_results.query_terms), UnorderedElementsAre("")); + if (IsCfStringTokenization()) { + EXPECT_THAT(query_results.query_terms[""], + UnorderedElementsAre("每", "天", "上班")); + EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), + UnorderedElementsAre("每", "天", "上班")); + } else { + EXPECT_THAT(query_results.query_terms[""], + UnorderedElementsAre("每天", "上班")); + EXPECT_THAT(ExtractKeys(query_results.query_term_iterators), + UnorderedElementsAre("每天", "上班")); + } + EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), + ElementsAre(kDocumentId0)); +} + } // namespace } // namespace lib diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc index 17b2acb..47f109c 100644 --- a/icing/query/query-processor.cc +++ b/icing/query/query-processor.cc @@ -151,18 +151,8 @@ libtextclassifier3::StatusOr QueryProcessor::ParseSearch( if (search_spec.search_type() == SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { ICING_VLOG(1) << "Using EXPERIMENTAL_ICING_ADVANCED_QUERY parser!"; - libtextclassifier3::StatusOr results_or = - ParseAdvancedQuery(search_spec, ranking_strategy); - if (results_or.ok()) { - results = std::move(results_or).ValueOrDie(); - } else { - ICING_VLOG(1) - << "Unable to parse query using advanced query parser. Error: " - << results_or.status().error_message() - << ". Falling back to old query parser."; - ICING_ASSIGN_OR_RETURN(results, - ParseRawQuery(search_spec, ranking_strategy)); - } + ICING_ASSIGN_OR_RETURN(results, + ParseAdvancedQuery(search_spec, ranking_strategy)); } else { ICING_ASSIGN_OR_RETURN(results, ParseRawQuery(search_spec, ranking_strategy)); @@ -204,13 +194,17 @@ libtextclassifier3::StatusOr QueryProcessor::ParseAdvancedQuery( document_store_.last_added_document_id()); return results; } + ICING_ASSIGN_OR_RETURN( + std::unique_ptr plain_tokenizer, + tokenizer_factory::CreateIndexingTokenizer( + StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_)); DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec); bool needs_term_frequency_info = ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE; - QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_, - &schema_store_, &normalizer_, std::move(options), - search_spec.term_match_type(), - needs_term_frequency_info); + QueryVisitor query_visitor( + &index_, &numeric_index_, &document_store_, &schema_store_, &normalizer_, + plain_tokenizer.get(), std::move(options), search_spec.term_match_type(), + needs_term_frequency_info); tree_root->Accept(&query_visitor); return std::move(query_visitor).ConsumeResults(); } diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc index 6d776ce..111b598 100644 --- a/icing/query/query-processor_benchmark.cc +++ b/icing/query/query-processor_benchmark.cc @@ -57,8 +57,8 @@ // $ adb push blaze-bin/icing/query/query-processor_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/query-processor_benchmark --benchmark_filter=all -// --adb +// $ adb shell /data/local/tmp/query-processor_benchmark +// --benchmark_filter=all --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, // the benchmark will set up data files accordingly. @@ -103,6 +103,7 @@ void BM_QueryOneTerm(benchmark::State& state) { Filesystem filesystem; const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark"; const std::string index_dir = base_dir + "/index"; + const std::string numeric_index_dir = base_dir + "/numeric_index"; const std::string schema_dir = base_dir + "/schema"; const std::string doc_store_dir = base_dir + "/store"; @@ -116,7 +117,9 @@ void BM_QueryOneTerm(benchmark::State& state) { std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); // TODO(b/249829533): switch to use persistent numeric index. - auto numeric_index = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + auto numeric_index, + DummyNumericIndex::Create(filesystem, numeric_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = @@ -226,6 +229,7 @@ void BM_QueryFiveTerms(benchmark::State& state) { Filesystem filesystem; const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark"; const std::string index_dir = base_dir + "/index"; + const std::string numeric_index_dir = base_dir + "/numeric_index"; const std::string schema_dir = base_dir + "/schema"; const std::string doc_store_dir = base_dir + "/store"; @@ -239,7 +243,9 @@ void BM_QueryFiveTerms(benchmark::State& state) { std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); // TODO(b/249829533): switch to use persistent numeric index. - auto numeric_index = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + auto numeric_index, + DummyNumericIndex::Create(filesystem, numeric_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = @@ -367,6 +373,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) { Filesystem filesystem; const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark"; const std::string index_dir = base_dir + "/index"; + const std::string numeric_index_dir = base_dir + "/numeric_index"; const std::string schema_dir = base_dir + "/schema"; const std::string doc_store_dir = base_dir + "/store"; @@ -380,7 +387,9 @@ void BM_QueryDiacriticTerm(benchmark::State& state) { std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); // TODO(b/249829533): switch to use persistent numeric index. - auto numeric_index = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + auto numeric_index, + DummyNumericIndex::Create(filesystem, numeric_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = @@ -493,6 +502,7 @@ void BM_QueryHiragana(benchmark::State& state) { Filesystem filesystem; const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark"; const std::string index_dir = base_dir + "/index"; + const std::string numeric_index_dir = base_dir + "/numeric_index"; const std::string schema_dir = base_dir + "/schema"; const std::string doc_store_dir = base_dir + "/store"; @@ -506,7 +516,9 @@ void BM_QueryHiragana(benchmark::State& state) { std::unique_ptr index = CreateIndex(icing_filesystem, filesystem, index_dir); // TODO(b/249829533): switch to use persistent numeric index. - auto numeric_index = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + auto numeric_index, + DummyNumericIndex::Create(filesystem, numeric_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr language_segmenter = diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index b807b14..7842a9a 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -70,7 +70,8 @@ class QueryProcessorTest : test_dir_(GetTestTempDir() + "/icing"), store_dir_(test_dir_ + "/store"), schema_store_dir_(test_dir_ + "/schema_store"), - index_dir_(test_dir_ + "/index") {} + index_dir_(test_dir_ + "/index"), + numeric_index_dir_(test_dir_ + "/numeric_index") {} void SetUp() override { filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); @@ -104,7 +105,9 @@ class QueryProcessorTest ICING_ASSERT_OK_AND_ASSIGN( index_, Index::Create(options, &filesystem_, &icing_filesystem_)); // TODO(b/249829533): switch to use persistent numeric index. - numeric_index_ = std::make_unique>(); + ICING_ASSERT_OK_AND_ASSIGN( + numeric_index_, + DummyNumericIndex::Create(filesystem_, numeric_index_dir_)); language_segmenter_factory::SegmenterOptions segmenter_options( ULOC_US, jni_cache_.get()); @@ -154,6 +157,7 @@ class QueryProcessorTest private: IcingFilesystem icing_filesystem_; const std::string index_dir_; + const std::string numeric_index_dir_; protected: std::unique_ptr index_; @@ -223,17 +227,25 @@ TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) { SearchSpecProto search_spec; search_spec.set_query("()"); search_spec.set_search_type(GetParam()); + if (GetParam() != + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + ICING_ASSERT_OK_AND_ASSIGN( + QueryResults results, + query_processor_->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); - ICING_ASSERT_OK_AND_ASSIGN( - QueryResults results, - query_processor_->ParseSearch(search_spec, - ScoringSpecProto::RankingStrategy::NONE)); - - // Descending order of valid DocumentIds - EXPECT_THAT(GetDocumentIds(results.root_iterator.get()), - ElementsAre(document_id2, document_id1)); - EXPECT_THAT(results.query_terms, IsEmpty()); - EXPECT_THAT(results.query_term_iterators, IsEmpty()); + // Descending order of valid DocumentIds + EXPECT_THAT(GetDocumentIds(results.root_iterator.get()), + ElementsAre(document_id2, document_id1)); + EXPECT_THAT(results.query_terms, IsEmpty()); + EXPECT_THAT(results.query_term_iterators, IsEmpty()); + } else { + // TODO(b/208654892): Resolve the difference between RAW_QUERY and ADVANCED + // regarding empty composite expressions. + EXPECT_THAT(query_processor_->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::NONE), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + } } TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) { diff --git a/icing/scoring/advanced_scoring/advanced-scorer.cc b/icing/scoring/advanced_scoring/advanced-scorer.cc index 212a476..771615c 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer.cc +++ b/icing/scoring/advanced_scoring/advanced-scorer.cc @@ -30,7 +30,8 @@ libtextclassifier3::StatusOr> AdvancedScorer::Create(const ScoringSpecProto& scoring_spec, double default_score, const DocumentStore* document_store, - const SchemaStore* schema_store) { + const SchemaStore* schema_store, + const JoinChildrenFetcher* join_children_fetcher) { ICING_RETURN_ERROR_IF_NULL(document_store); ICING_RETURN_ERROR_IF_NULL(schema_store); @@ -48,15 +49,14 @@ AdvancedScorer::Create(const ScoringSpecProto& scoring_spec, std::make_unique(document_store, std::move(section_weights)); ScoringVisitor visitor(default_score, document_store, schema_store, - bm25f_calculator.get()); + bm25f_calculator.get(), join_children_fetcher); tree_root->Accept(&visitor); ICING_ASSIGN_OR_RETURN(std::unique_ptr expression, std::move(visitor).Expression()); - if (expression->is_document_type()) { + if (expression->type() != ScoreExpressionType::kDouble) { return absl_ports::InvalidArgumentError( - "The root scoring expression will always be evaluated to a document, " - "but a number is expected."); + "The root scoring expression is not of double type."); } return std::unique_ptr(new AdvancedScorer( std::move(expression), std::move(bm25f_calculator), default_score)); diff --git a/icing/scoring/advanced_scoring/advanced-scorer.h b/icing/scoring/advanced_scoring/advanced-scorer.h index 763499b..1a1cd5c 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer.h +++ b/icing/scoring/advanced_scoring/advanced-scorer.h @@ -20,6 +20,7 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/join/join-children-fetcher.h" #include "icing/schema/schema-store.h" #include "icing/scoring/advanced_scoring/score-expression.h" #include "icing/scoring/bm25f-calculator.h" @@ -37,7 +38,8 @@ class AdvancedScorer : public Scorer { // INVALID_ARGUMENT if fails to create an instance static libtextclassifier3::StatusOr> Create( const ScoringSpecProto& scoring_spec, double default_score, - const DocumentStore* document_store, const SchemaStore* schema_store); + const DocumentStore* document_store, const SchemaStore* schema_store, + const JoinChildrenFetcher* join_children_fetcher = nullptr); double GetScore(const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override { diff --git a/icing/scoring/advanced_scoring/score-expression.cc b/icing/scoring/advanced_scoring/score-expression.cc index a8749df..6393645 100644 --- a/icing/scoring/advanced_scoring/score-expression.cc +++ b/icing/scoring/advanced_scoring/score-expression.cc @@ -17,6 +17,18 @@ namespace icing { namespace lib { +namespace { + +libtextclassifier3::Status CheckChildrenNotNull( + const std::vector>& children) { + for (const auto& child : children) { + ICING_RETURN_ERROR_IF_NULL(child); + } + return libtextclassifier3::Status::OK; +} + +} // namespace + libtextclassifier3::StatusOr> OperatorScoreExpression::Create( OperatorType op, std::vector> children) { @@ -24,12 +36,13 @@ OperatorScoreExpression::Create( return absl_ports::InvalidArgumentError( "OperatorScoreExpression must have at least one argument."); } + ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children)); + bool children_all_constant_double = true; for (const auto& child : children) { - ICING_RETURN_ERROR_IF_NULL(child); - if (child->is_document_type()) { + if (child->type() != ScoreExpressionType::kDouble) { return absl_ports::InvalidArgumentError( - "Operators are not supported for document type."); + "Operators are only supported for double type."); } if (!child->is_constant_double()) { children_all_constant_double = false; @@ -54,7 +67,7 @@ OperatorScoreExpression::Create( } libtextclassifier3::StatusOr OperatorScoreExpression::eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) { + const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const { // The Create factory guarantees that an operator will have at least one // child. ICING_ASSIGN_OR_RETURN(double res, children_.at(0)->eval(hit_info, query_it)); @@ -97,6 +110,10 @@ const std::unordered_map {"sin", FunctionType::kSin}, {"cos", FunctionType::kCos}, {"tan", FunctionType::kTan}}; +const std::unordered_set + MathFunctionScoreExpression::kVariableArgumentsFunctions = { + FunctionType::kMax, FunctionType::kMin}; + libtextclassifier3::StatusOr> MathFunctionScoreExpression::Create( FunctionType function_type, @@ -105,12 +122,26 @@ MathFunctionScoreExpression::Create( return absl_ports::InvalidArgumentError( "Math functions must have at least one argument."); } + ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children)); + + // Received a list type in the function argument. + if (children.size() == 1 && + children[0]->type() == ScoreExpressionType::kDoubleList) { + // Only certain functions support list type. + if (kVariableArgumentsFunctions.count(function_type) > 0) { + return std::unique_ptr( + new MathFunctionScoreExpression(function_type, std::move(children))); + } + return absl_ports::InvalidArgumentError(absl_ports::StrCat( + "Received an unsupported list type argument in the math function.")); + } + bool children_all_constant_double = true; for (const auto& child : children) { - ICING_RETURN_ERROR_IF_NULL(child); - if (child->is_document_type()) { + if (child->type() != ScoreExpressionType::kDouble) { return absl_ports::InvalidArgumentError( - "Math functions are not supported for document type."); + "Got an invalid type for the math function. Should expect a double " + "type argument."); } if (!child->is_constant_double()) { children_all_constant_double = false; @@ -172,11 +203,16 @@ MathFunctionScoreExpression::Create( } libtextclassifier3::StatusOr MathFunctionScoreExpression::eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) { + const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const { std::vector values; - for (const auto& child : children_) { - ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it)); - values.push_back(v); + if (children_.at(0)->type() == ScoreExpressionType::kDoubleList) { + ICING_ASSIGN_OR_RETURN(values, + children_.at(0)->eval_list(hit_info, query_it)); + } else { + for (const auto& child : children_) { + ICING_ASSIGN_OR_RETURN(double v, child->eval(hit_info, query_it)); + values.push_back(v); + } } double res = 0; @@ -194,9 +230,17 @@ libtextclassifier3::StatusOr MathFunctionScoreExpression::eval( res = pow(values[0], values[1]); break; case FunctionType::kMax: + if (values.empty()) { + return absl_ports::InvalidArgumentError( + "Got an empty parameter set in max function"); + } res = *std::max_element(values.begin(), values.end()); break; case FunctionType::kMin: + if (values.empty()) { + return absl_ports::InvalidArgumentError( + "Got an empty parameter set in min function"); + } res = *std::min_element(values.begin(), values.end()); break; case FunctionType::kSqrt: @@ -240,10 +284,9 @@ DocumentFunctionScoreExpression::Create( return absl_ports::InvalidArgumentError( "Document-based functions must have at least one argument."); } - for (const auto& child : children) { - ICING_RETURN_ERROR_IF_NULL(child); - } - if (!children[0]->is_document_type()) { + ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children)); + + if (children[0]->type() != ScoreExpressionType::kDocument) { return absl_ports::InvalidArgumentError( "The first parameter of document-based functions must be \"this\"."); } @@ -259,7 +302,8 @@ DocumentFunctionScoreExpression::Create( case FunctionType::kUsageCount: [[fallthrough]]; case FunctionType::kUsageLastUsedTimestamp: - if (children.size() != 2 || children[1]->is_document_type()) { + if (children.size() != 2 || + children[1]->type() != ScoreExpressionType::kDouble) { return absl_ports::InvalidArgumentError( "UsageCount/UsageLastUsedTimestamp must have 2 arguments. The " "first argument should be \"this\", and the second argument " @@ -273,7 +317,7 @@ DocumentFunctionScoreExpression::Create( } libtextclassifier3::StatusOr DocumentFunctionScoreExpression::eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) { + const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const { switch (function_type_) { case FunctionType::kDocumentScore: [[fallthrough]]; @@ -330,8 +374,9 @@ RelevanceScoreFunctionScoreExpression::Create( return absl_ports::InvalidArgumentError( "relevanceScore must have 1 argument."); } - ICING_RETURN_ERROR_IF_NULL(children[0]); - if (!children[0]->is_document_type()) { + ICING_RETURN_IF_ERROR(CheckChildrenNotNull(children)); + + if (children[0]->type() != ScoreExpressionType::kDocument) { return absl_ports::InvalidArgumentError( "relevanceScore must take \"this\" as its argument."); } @@ -342,7 +387,7 @@ RelevanceScoreFunctionScoreExpression::Create( libtextclassifier3::StatusOr RelevanceScoreFunctionScoreExpression::eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) { + const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const { if (query_it == nullptr) { return default_score_; } diff --git a/icing/scoring/advanced_scoring/score-expression.h b/icing/scoring/advanced_scoring/score-expression.h index f80da33..047a175 100644 --- a/icing/scoring/advanced_scoring/score-expression.h +++ b/icing/scoring/advanced_scoring/score-expression.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "icing/text_classifier/lib3/utils/base/statusor.h" @@ -31,6 +32,12 @@ namespace icing { namespace lib { +enum class ScoreExpressionType { + kDouble, + kDoubleList, + kDocument // Only "this" is considered as document type. +}; + class ScoreExpression { public: virtual ~ScoreExpression() = default; @@ -43,10 +50,32 @@ class ScoreExpression { // expression. // - INTERNAL if there are inconsistencies. virtual libtextclassifier3::StatusOr eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) = 0; + const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const { + if (type() == ScoreExpressionType::kDouble) { + return absl_ports::UnimplementedError( + "All ScoreExpressions of type Double must provide their own " + "implementation of eval!"); + } + return absl_ports::InternalError( + "Runtime type error: the expression should never be evaluated to a " + "double. There must be inconsistencies in the static type checking."); + } - // Indicate whether the current expression is of document type - virtual bool is_document_type() const { return false; } + virtual libtextclassifier3::StatusOr> eval_list( + const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) const { + if (type() == ScoreExpressionType::kDoubleList) { + return absl_ports::UnimplementedError( + "All ScoreExpressions of type Double List must provide their own " + "implementation of eval_list!"); + } + return absl_ports::InternalError( + "Runtime type error: the expression should never be evaluated to a " + "double list. There must be inconsistencies in the static type " + "checking."); + } + + // Indicate the type to which the current expression will be evaluated. + virtual ScoreExpressionType type() const = 0; // Indicate whether the current expression is a constant double. // Returns true if and only if the object is of ConstantScoreExpression type. @@ -59,15 +88,10 @@ class ThisExpression : public ScoreExpression { return std::unique_ptr(new ThisExpression()); } - libtextclassifier3::StatusOr eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override { - return absl_ports::InternalError( - "Should never reach here to evaluate a document type as double. " - "There must be inconsistencies."); + ScoreExpressionType type() const override { + return ScoreExpressionType::kDocument; } - bool is_document_type() const override { return true; } - private: ThisExpression() = default; }; @@ -81,10 +105,14 @@ class ConstantScoreExpression : public ScoreExpression { } libtextclassifier3::StatusOr eval( - const DocHitInfo&, const DocHitInfoIterator*) override { + const DocHitInfo&, const DocHitInfoIterator*) const override { return c_; } + ScoreExpressionType type() const override { + return ScoreExpressionType::kDouble; + } + bool is_constant_double() const override { return true; } private: @@ -107,7 +135,12 @@ class OperatorScoreExpression : public ScoreExpression { OperatorType op, std::vector> children); libtextclassifier3::StatusOr eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override; + const DocHitInfo& hit_info, + const DocHitInfoIterator* query_it) const override; + + ScoreExpressionType type() const override { + return ScoreExpressionType::kDouble; + } private: explicit OperatorScoreExpression( @@ -134,6 +167,8 @@ class MathFunctionScoreExpression : public ScoreExpression { static const std::unordered_map kFunctionNames; + static const std::unordered_set kVariableArgumentsFunctions; + // RETURNS: // - A MathFunctionScoreExpression instance on success if not simplifiable. // - A ConstantScoreExpression instance on success if simplifiable. @@ -144,7 +179,12 @@ class MathFunctionScoreExpression : public ScoreExpression { std::vector> children); libtextclassifier3::StatusOr eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override; + const DocHitInfo& hit_info, + const DocHitInfoIterator* query_it) const override; + + ScoreExpressionType type() const override { + return ScoreExpressionType::kDouble; + } private: explicit MathFunctionScoreExpression( @@ -178,7 +218,12 @@ class DocumentFunctionScoreExpression : public ScoreExpression { const DocumentStore* document_store, double default_score); libtextclassifier3::StatusOr eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override; + const DocHitInfo& hit_info, + const DocHitInfoIterator* query_it) const override; + + ScoreExpressionType type() const override { + return ScoreExpressionType::kDouble; + } private: explicit DocumentFunctionScoreExpression( @@ -210,7 +255,12 @@ class RelevanceScoreFunctionScoreExpression : public ScoreExpression { Bm25fCalculator* bm25f_calculator, double default_score); libtextclassifier3::StatusOr eval( - const DocHitInfo& hit_info, const DocHitInfoIterator* query_it) override; + const DocHitInfo& hit_info, + const DocHitInfoIterator* query_it) const override; + + ScoreExpressionType type() const override { + return ScoreExpressionType::kDouble; + } private: explicit RelevanceScoreFunctionScoreExpression( diff --git a/icing/scoring/advanced_scoring/score-expression_test.cc b/icing/scoring/advanced_scoring/score-expression_test.cc index b49b658..07c9d76 100644 --- a/icing/scoring/advanced_scoring/score-expression_test.cc +++ b/icing/scoring/advanced_scoring/score-expression_test.cc @@ -39,13 +39,39 @@ class NonConstantScoreExpression : public ScoreExpression { } libtextclassifier3::StatusOr eval( - const DocHitInfo&, const DocHitInfoIterator*) override { + const DocHitInfo &, const DocHitInfoIterator *) const override { return 0; } + ScoreExpressionType type() const override { + return ScoreExpressionType::kDouble; + } + bool is_constant_double() const override { return false; } }; +class ListScoreExpression : public ScoreExpression { + public: + static std::unique_ptr Create( + const std::vector &values) { + std::unique_ptr res = + std::make_unique(); + res->values = values; + return res; + } + + libtextclassifier3::StatusOr> eval_list( + const DocHitInfo &, const DocHitInfoIterator *) const override { + return values; + } + + ScoreExpressionType type() const override { + return ScoreExpressionType::kDoubleList; + } + + std::vector values; +}; + template std::vector> MakeChildren(Args... args) { std::vector> children; @@ -180,6 +206,82 @@ TEST(ScoreExpressionTest, CannotSimplifyNonConstant) { ASSERT_FALSE(expression->is_constant_double()); } +TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgument) { + // max({1, 2, 3}) = 3 + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr expression, + MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMax, + MakeChildren(ListScoreExpression::Create({1, 2, 3})))); + EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(3))); + + // min({1, 2, 3}) = 1 + ICING_ASSERT_OK_AND_ASSIGN( + expression, MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMin, + MakeChildren(ListScoreExpression::Create({1, 2, 3})))); + EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(1))); + + // max({4}) = 4 + ICING_ASSERT_OK_AND_ASSIGN( + expression, MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMax, + MakeChildren(ListScoreExpression::Create({4})))); + EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(4))); + + // min({5}) = 5 + ICING_ASSERT_OK_AND_ASSIGN( + expression, MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMin, + MakeChildren(ListScoreExpression::Create({5})))); + EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), IsOkAndHolds(Eq(5))); +} + +TEST(ScoreExpressionTest, MathFunctionsWithListTypeArgumentError) { + // max({}) = evaluation error, since max on empty list does not produce a + // valid result. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr expression, + MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMax, + MakeChildren(ListScoreExpression::Create({})))); + EXPECT_THAT(expression->eval(DocHitInfo(), nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + + // max(1, {2}) = type error, since max must take either n > 0 parameters of + // type double, or a single parameter of type list. + EXPECT_THAT(MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMax, + MakeChildren(ConstantScoreExpression::Create(1), + ListScoreExpression::Create({2}))), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + + // min({1}, {2}) = type error, since min must take either n > 0 parameters of + // type double, or a single parameter of type list. + EXPECT_THAT(MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kMin, + MakeChildren(ListScoreExpression::Create({1}), + ListScoreExpression::Create({2}))), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + + // sin({1}) = type error, since sin does not support list type parameters. + EXPECT_THAT(MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kSin, + MakeChildren(ListScoreExpression::Create({1}))), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST(ScoreExpressionTest, ChildrenCannotBeNull) { + EXPECT_THAT(OperatorScoreExpression::Create( + OperatorScoreExpression::OperatorType::kPlus, + MakeChildren(ConstantScoreExpression::Create(1), nullptr)), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(MathFunctionScoreExpression::Create( + MathFunctionScoreExpression::FunctionType::kPow, + MakeChildren(ConstantScoreExpression::Create(2), nullptr)), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); +} + } // namespace } // namespace lib diff --git a/icing/scoring/advanced_scoring/scoring-visitor.h b/icing/scoring/advanced_scoring/scoring-visitor.h index 539af2d..9b01f73 100644 --- a/icing/scoring/advanced_scoring/scoring-visitor.h +++ b/icing/scoring/advanced_scoring/scoring-visitor.h @@ -17,6 +17,7 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/join/join-children-fetcher.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/proto/scoring.pb.h" #include "icing/query/advanced_query_parser/abstract-syntax-tree.h" @@ -32,11 +33,13 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor { explicit ScoringVisitor(double default_score, const DocumentStore* document_store, const SchemaStore* schema_store, - Bm25fCalculator* bm25f_calculator) + Bm25fCalculator* bm25f_calculator, + const JoinChildrenFetcher* join_children_fetcher) : default_score_(default_score), document_store_(*document_store), schema_store_(*schema_store), - bm25f_calculator_(*bm25f_calculator) {} + bm25f_calculator_(*bm25f_calculator), + join_children_fetcher_(join_children_fetcher) {} void VisitFunctionName(const FunctionNameNode* node) override; void VisitString(const StringNode* node) override; @@ -86,6 +89,8 @@ class ScoringVisitor : public AbstractSyntaxTreeVisitor { const DocumentStore& document_store_; const SchemaStore& schema_store_; Bm25fCalculator& bm25f_calculator_; + // A non-null join_children_fetcher_ indicates scoring in a join. + const JoinChildrenFetcher* join_children_fetcher_; // Does not own. libtextclassifier3::Status pending_error_; std::vector> stack; diff --git a/icing/scoring/scorer-factory.cc b/icing/scoring/scorer-factory.cc index f75b564..c647642 100644 --- a/icing/scoring/scorer-factory.cc +++ b/icing/scoring/scorer-factory.cc @@ -163,7 +163,8 @@ namespace scorer_factory { libtextclassifier3::StatusOr> Create( const ScoringSpecProto& scoring_spec, double default_score, - const DocumentStore* document_store, const SchemaStore* schema_store) { + const DocumentStore* document_store, const SchemaStore* schema_store, + const JoinChildrenFetcher* join_children_fetcher) { ICING_RETURN_ERROR_IF_NULL(document_store); ICING_RETURN_ERROR_IF_NULL(schema_store); @@ -211,7 +212,7 @@ libtextclassifier3::StatusOr> Create( "Advanced scoring is enabled, but the expression is empty!"); } return AdvancedScorer::Create(scoring_spec, default_score, document_store, - schema_store); + schema_store, join_children_fetcher); case ScoringSpecProto::RankingStrategy::JOIN_AGGREGATE_SCORE: // Use join aggregate score to rank. Since the aggregation score is // calculated by child documents after joining (in JoinProcessor), we can diff --git a/icing/scoring/scorer-factory.h b/icing/scoring/scorer-factory.h index 8c19c75..460e5bb 100644 --- a/icing/scoring/scorer-factory.h +++ b/icing/scoring/scorer-factory.h @@ -16,6 +16,7 @@ #define ICING_SCORING_SCORER_FACTORY_H_ #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/join/join-children-fetcher.h" #include "icing/scoring/scorer.h" #include "icing/store/document-store.h" @@ -36,7 +37,8 @@ namespace scorer_factory { // INVALID_ARGUMENT if fails to create an instance libtextclassifier3::StatusOr> Create( const ScoringSpecProto& scoring_spec, double default_score, - const DocumentStore* document_store, const SchemaStore* schema_store); + const DocumentStore* document_store, const SchemaStore* schema_store, + const JoinChildrenFetcher* join_children_fetcher = nullptr); } // namespace scorer_factory diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc index 571a112..894852d 100644 --- a/icing/scoring/scoring-processor.cc +++ b/icing/scoring/scoring-processor.cc @@ -43,7 +43,8 @@ constexpr double kDefaultScoreInAscendingOrder = libtextclassifier3::StatusOr> ScoringProcessor::Create(const ScoringSpecProto& scoring_spec, const DocumentStore* document_store, - const SchemaStore* schema_store) { + const SchemaStore* schema_store, + const JoinChildrenFetcher* join_children_fetcher) { ICING_RETURN_ERROR_IF_NULL(document_store); ICING_RETURN_ERROR_IF_NULL(schema_store); @@ -52,11 +53,11 @@ ScoringProcessor::Create(const ScoringSpecProto& scoring_spec, ICING_ASSIGN_OR_RETURN( std::unique_ptr scorer, - scorer_factory::Create(scoring_spec, - is_descending_order - ? kDefaultScoreInDescendingOrder - : kDefaultScoreInAscendingOrder, - document_store, schema_store)); + scorer_factory::Create( + scoring_spec, + is_descending_order ? kDefaultScoreInDescendingOrder + : kDefaultScoreInAscendingOrder, + document_store, schema_store, join_children_fetcher)); // Using `new` to access a non-public constructor. return std::unique_ptr( new ScoringProcessor(std::move(scorer))); diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h index e7d09b1..9cd4d85 100644 --- a/icing/scoring/scoring-processor.h +++ b/icing/scoring/scoring-processor.h @@ -21,6 +21,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/join/join-children-fetcher.h" #include "icing/proto/scoring.pb.h" #include "icing/scoring/scored-document-hit.h" #include "icing/scoring/scorer.h" @@ -41,7 +42,8 @@ class ScoringProcessor { // FAILED_PRECONDITION on any null pointer input static libtextclassifier3::StatusOr> Create( const ScoringSpecProto& scoring_spec, const DocumentStore* document_store, - const SchemaStore* schema_store); + const SchemaStore* schema_store, + const JoinChildrenFetcher* join_children_fetcher = nullptr); // Assigns scores to DocHitInfos from the given DocHitInfoIterator and returns // a vector of ScoredDocumentHits. The size of results is no more than diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 9e79790..2a7e108 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -1873,8 +1873,7 @@ libtextclassifier3::Status DocumentStore::SetUsageScores( libtextclassifier3::StatusOr< google::protobuf::RepeatedPtrField> DocumentStore::CollectCorpusInfo() const { - google::protobuf::RepeatedPtrField - corpus_info; + google::protobuf::RepeatedPtrField corpus_info; libtextclassifier3::StatusOr schema_proto_or = schema_store_->GetSchema(); if (!schema_proto_or.ok()) { @@ -1919,10 +1918,10 @@ DocumentStore::GetDebugInfo(int verbosity) const { ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum()); debug_info.set_crc(crc.Get()); if (verbosity > 0) { - ICING_ASSIGN_OR_RETURN(google::protobuf::RepeatedPtrField< - DocumentDebugInfoProto::CorpusInfo> - corpus_info, - CollectCorpusInfo()); + ICING_ASSIGN_OR_RETURN( + google::protobuf::RepeatedPtrField + corpus_info, + CollectCorpusInfo()); *debug_info.mutable_corpus_info() = std::move(corpus_info); } return debug_info; diff --git a/icing/store/document-store.h b/icing/store/document-store.h index bda351d..3e02636 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -730,8 +730,8 @@ class DocumentStore { // Returns: // - on success, a RepeatedPtrField for CorpusInfo collected. // - OUT_OF_RANGE, this should never happen. - libtextclassifier3::StatusOr> + libtextclassifier3::StatusOr< + google::protobuf::RepeatedPtrField> CollectCorpusInfo() const; }; diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 7cf951a..a115e11 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -88,7 +88,9 @@ const NamespaceStorageInfoProto& GetNamespaceStorageInfo( // Didn't find our namespace, fail the test. EXPECT_TRUE(false) << "Failed to find namespace '" << name_space << "' in DocumentStorageInfoProto."; - return std::move(NamespaceStorageInfoProto()); + static const auto& default_namespace_storage_info = + *new NamespaceStorageInfoProto(); + return default_namespace_storage_info; } UsageReport CreateUsageReport(std::string name_space, std::string uri, diff --git a/icing/store/key-mapper_benchmark.cc b/icing/store/key-mapper_benchmark.cc index b649bc7..1ce54c7 100644 --- a/icing/store/key-mapper_benchmark.cc +++ b/icing/store/key-mapper_benchmark.cc @@ -35,6 +35,7 @@ namespace lib { namespace { using ::testing::Eq; +using ::testing::IsTrue; using ::testing::Not; class KeyMapperBenchmark { @@ -78,8 +79,10 @@ class KeyMapperBenchmark { template <> libtextclassifier3::StatusOr>> CreateKeyMapper>(int max_num_entries) { + std::string working_path = + absl_ports::StrCat(base_dir, "/", "key_mapper_dir"); return PersistentHashMapKeyMapper::Create( - filesystem, base_dir, max_num_entries, + filesystem, std::move(working_path), max_num_entries, /*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int), /*max_load_factor_percent=*/100); } @@ -109,6 +112,7 @@ void BM_PutMany(benchmark::State& state) { state.PauseTiming(); benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str()); DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir); + ASSERT_THAT(ddir.is_valid(), IsTrue()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr> key_mapper, benchmark.CreateKeyMapper(num_keys)); @@ -166,6 +170,7 @@ void BM_Put(benchmark::State& state) { KeyMapperBenchmark benchmark; benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str()); DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir); + ASSERT_THAT(ddir.is_valid(), IsTrue()); // The overhead of state.PauseTiming is too large and affects the benchmark // result a lot, so pre-generate enough kvps to avoid calling too many times @@ -206,6 +211,7 @@ void BM_Get(benchmark::State& state) { KeyMapperBenchmark benchmark; benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str()); DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir); + ASSERT_THAT(ddir.is_valid(), IsTrue()); // Create a key mapper with num_keys entries. ICING_ASSERT_OK_AND_ASSIGN( @@ -260,6 +266,7 @@ void BM_Iterator(benchmark::State& state) { KeyMapperBenchmark benchmark; benchmark.filesystem.DeleteDirectoryRecursively(benchmark.base_dir.c_str()); DestructibleDirectory ddir(&benchmark.filesystem, benchmark.base_dir); + ASSERT_THAT(ddir.is_valid(), IsTrue()); // Create a key mapper with num_keys entries. ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/store/key-mapper_test.cc b/icing/store/key-mapper_test.cc index 682888d..1367c2d 100644 --- a/icing/store/key-mapper_test.cc +++ b/icing/store/key-mapper_test.cc @@ -32,6 +32,7 @@ #include "icing/testing/tmp-directory.h" using ::testing::IsEmpty; +using ::testing::IsTrue; using ::testing::Pair; using ::testing::UnorderedElementsAre; @@ -47,7 +48,13 @@ class KeyMapperTest : public ::testing::Test { protected: using KeyMapperType = T; - void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; } + void SetUp() override { + base_dir_ = GetTestTempDir() + "/icing"; + ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), + IsTrue()); + + working_dir_ = base_dir_ + "/key_mapper"; + } void TearDown() override { filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()); @@ -63,17 +70,18 @@ class KeyMapperTest : public ::testing::Test { libtextclassifier3::StatusOr>> CreateKeyMapper>() { return DynamicTrieKeyMapper::Create( - filesystem_, base_dir_, kMaxDynamicTrieKeyMapperSize); + filesystem_, working_dir_, kMaxDynamicTrieKeyMapperSize); } template <> libtextclassifier3::StatusOr>> CreateKeyMapper>() { return PersistentHashMapKeyMapper::Create(filesystem_, - base_dir_); + working_dir_); } std::string base_dir_; + std::string working_dir_; Filesystem filesystem_; }; @@ -175,15 +183,15 @@ TYPED_TEST(KeyMapperTest, CanUseAcrossMultipleInstances) { TYPED_TEST(KeyMapperTest, CanDeleteAndRestartKeyMapping) { // Can delete even if there's nothing there - ICING_EXPECT_OK( - TestFixture::KeyMapperType::Delete(this->filesystem_, this->base_dir_)); + ICING_EXPECT_OK(TestFixture::KeyMapperType::Delete(this->filesystem_, + this->working_dir_)); ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr> key_mapper, this->template CreateKeyMapper()); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->PersistToDisk()); - ICING_EXPECT_OK( - TestFixture::KeyMapperType::Delete(this->filesystem_, this->base_dir_)); + ICING_EXPECT_OK(TestFixture::KeyMapperType::Delete(this->filesystem_, + this->working_dir_)); key_mapper.reset(); ICING_ASSERT_OK_AND_ASSIGN(key_mapper, diff --git a/icing/store/persistent-hash-map-key-mapper.h b/icing/store/persistent-hash-map-key-mapper.h index a13ec11..5f83e6f 100644 --- a/icing/store/persistent-hash-map-key-mapper.h +++ b/icing/store/persistent-hash-map-key-mapper.h @@ -43,11 +43,13 @@ class PersistentHashMapKeyMapper : public KeyMapper { // Returns any encountered IO errors. // // filesystem: Object to make system level calls - // base_dir : Base directory used to save all the files required to persist - // PersistentHashMapKeyMapper. If this base_dir was previously used - // to create a PersistentHashMapKeyMapper, then this existing data - // would be loaded. Otherwise, an empty PersistentHashMapKeyMapper - // would be created. + // working_path: Working directory used to save all the files required to + // persist PersistentHashMapKeyMapper. If this working_path was + // previously used to create a PersistentHashMapKeyMapper, then + // this existing data would be loaded. Otherwise, an empty + // PersistentHashMapKeyMapper would be created. See + // PersistentStorage for more details about the concept of + // working_path. // max_num_entries: max # of kvps. It will be used to compute 3 storages size. // average_kv_byte_size: average byte size of a single key + serialized value. // It will be used to compute kv_storage size. @@ -60,24 +62,25 @@ class PersistentHashMapKeyMapper : public KeyMapper { // considered valid. static libtextclassifier3::StatusOr< std::unique_ptr>> - Create(const Filesystem& filesystem, std::string_view base_dir, + Create(const Filesystem& filesystem, std::string working_path, int32_t max_num_entries = PersistentHashMap::Entry::kMaxNumEntries, int32_t average_kv_byte_size = PersistentHashMap::Options::kDefaultAverageKVByteSize, int32_t max_load_factor_percent = PersistentHashMap::Options::kDefaultMaxLoadFactorPercent); - // Deletes all the files associated with the PersistentHashMapKeyMapper. + // Deletes working_path (and all the files under it recursively) associated + // with the PersistentHashMapKeyMapper. // - // base_dir : Base directory used to save all the files required to persist - // PersistentHashMapKeyMapper. Should be the same as passed into - // Create(). + // working_path: Working directory used to save all the files required to + // persist PersistentHashMapKeyMapper. Should be the same as + // passed into Create(). // // Returns: // OK on success // INTERNAL_ERROR on I/O error static libtextclassifier3::Status Delete(const Filesystem& filesystem, - std::string_view base_dir); + const std::string& working_path); ~PersistentHashMapKeyMapper() override = default; @@ -122,7 +125,7 @@ class PersistentHashMapKeyMapper : public KeyMapper { } libtextclassifier3::StatusOr ComputeChecksum() override { - return persistent_hash_map_->ComputeChecksum(); + return persistent_hash_map_->UpdateChecksums(); } private: @@ -147,8 +150,6 @@ class PersistentHashMapKeyMapper : public KeyMapper { PersistentHashMap::Iterator itr_; }; - static constexpr std::string_view kKeyMapperDir = "key_mapper_dir"; - // Use PersistentHashMapKeyMapper::Create() to instantiate. explicit PersistentHashMapKeyMapper( std::unique_ptr persistent_hash_map) @@ -164,21 +165,13 @@ template /* static */ libtextclassifier3::StatusOr< std::unique_ptr>> PersistentHashMapKeyMapper::Create( - const Filesystem& filesystem, std::string_view base_dir, + const Filesystem& filesystem, std::string working_path, int32_t max_num_entries, int32_t average_kv_byte_size, int32_t max_load_factor_percent) { - const std::string key_mapper_dir = - absl_ports::StrCat(base_dir, "/", kKeyMapperDir); - if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) { - return absl_ports::InternalError(absl_ports::StrCat( - "Failed to create PersistentHashMapKeyMapper directory: ", - key_mapper_dir)); - } - ICING_ASSIGN_OR_RETURN( std::unique_ptr persistent_hash_map, PersistentHashMap::Create( - filesystem, key_mapper_dir, + filesystem, std::move(working_path), PersistentHashMap::Options( /*value_type_size_in=*/sizeof(T), /*max_num_entries_in=*/max_num_entries, @@ -191,16 +184,9 @@ PersistentHashMapKeyMapper::Create( template /* static */ libtextclassifier3::Status -PersistentHashMapKeyMapper::Delete(const Filesystem& filesystem, - std::string_view base_dir) { - const std::string key_mapper_dir = - absl_ports::StrCat(base_dir, "/", kKeyMapperDir); - if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) { - return absl_ports::InternalError(absl_ports::StrCat( - "Failed to delete PersistentHashMapKeyMapper directory: ", - key_mapper_dir)); - } - return libtextclassifier3::Status::OK; +PersistentHashMapKeyMapper::Delete( + const Filesystem& filesystem, const std::string& working_path) { + return PersistentHashMap::Discard(filesystem, working_path); } } // namespace lib diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc index aceb11d..f32e541 100644 --- a/icing/transform/icu/icu-normalizer.cc +++ b/icing/transform/icu/icu-normalizer.cc @@ -134,17 +134,16 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const { ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance"; } - // Checks if the first character is within ASCII range or can be transformed - // into an ASCII char. Since the term is tokenized, we know that the whole - // term can be transformed into ASCII if the first character can. - UChar32 first_uchar32 = - i18n_utils::GetUChar32At(term.data(), term.length(), 0); - if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 && - DiacriticCharToAscii(normalizer2, first_uchar32, nullptr)) { - // This is a faster method to normalize Latin terms. - normalized_text = NormalizeLatin(normalizer2, term); - } else { - normalized_text = term_transformer_->Transform(term); + // Normalize the prefix that can be transformed into ASCII. + // This is a faster method to normalize Latin terms. + NormalizeLatinResult result = NormalizeLatin(normalizer2, term); + normalized_text = std::move(result.text); + if (result.end_pos < term.length()) { + // Some portion of term couldn't be normalized via NormalizeLatin. Use + // term_transformer to handle this portion. + std::string_view rest_term = term.substr(result.end_pos); + absl_ports::StrAppend(&normalized_text, + term_transformer_->Transform(rest_term)); } if (normalized_text.length() > max_term_byte_size_) { @@ -154,40 +153,32 @@ std::string IcuNormalizer::NormalizeTerm(const std::string_view term) const { return normalized_text; } -std::string IcuNormalizer::NormalizeLatin(const UNormalizer2* normalizer2, - const std::string_view term) const { - std::string result; - result.reserve(term.length()); - int current_pos = 0; - while (current_pos < term.length()) { - if (i18n_utils::IsAscii(term[current_pos])) { - result.push_back(std::tolower(term[current_pos])); - ++current_pos; +IcuNormalizer::NormalizeLatinResult IcuNormalizer::NormalizeLatin( + const UNormalizer2* normalizer2, const std::string_view term) const { + NormalizeLatinResult result = {}; + if (normalizer2 == nullptr) { + return result; + } + CharacterIterator char_itr(term); + result.text.reserve(term.length()); + char ascii_char; + while (char_itr.utf8_index() < term.length()) { + UChar32 c = char_itr.GetCurrentChar(); + if (i18n_utils::IsAscii(c)) { + result.text.push_back(std::tolower(c)); + } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) { + result.text.push_back(std::tolower(ascii_char)); } else { - UChar32 uchar32 = - i18n_utils::GetUChar32At(term.data(), term.length(), current_pos); - if (uchar32 == i18n_utils::kInvalidUChar32) { - ICING_LOG(WARNING) << "Unable to get uchar32 from " << term - << " at position" << current_pos; - current_pos += i18n_utils::GetUtf8Length(uchar32); - continue; - } - char ascii_char; - if (DiacriticCharToAscii(normalizer2, uchar32, &ascii_char)) { - result.push_back(std::tolower(ascii_char)); - } else { - // We don't know how to transform / decompose this Unicode character, it - // probably means that some other Unicode characters are mixed with - // Latin characters. This shouldn't happen if input term is properly - // tokenized. We handle it here in case there're something wrong with - // the tokenizers. - int utf8_length = i18n_utils::GetUtf8Length(uchar32); - absl_ports::StrAppend(&result, term.substr(current_pos, utf8_length)); - } - current_pos += i18n_utils::GetUtf8Length(uchar32); + // We don't know how to transform / decompose this Unicode character, it + // probably means that some other Unicode characters are mixed with Latin + // characters. We return the partial result here and let the caller handle + // the rest. + result.end_pos = char_itr.utf8_index(); + return result; } + char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1); } - + result.end_pos = term.length(); return result; } @@ -267,10 +258,13 @@ std::string IcuNormalizer::TermTransformer::Transform( return std::move(utf8_term_or).ValueOrDie(); } -CharacterIterator FindNormalizedLatinMatchEndPosition( +bool IcuNormalizer::FindNormalizedLatinMatchEndPosition( const UNormalizer2* normalizer2, std::string_view term, - CharacterIterator char_itr, std::string_view normalized_term) { - CharacterIterator normalized_char_itr(normalized_term); + CharacterIterator& char_itr, std::string_view normalized_term, + CharacterIterator& normalized_char_itr) const { + if (normalizer2 == nullptr) { + return false; + } char ascii_char; while (char_itr.utf8_index() < term.length() && normalized_char_itr.utf8_index() < normalized_term.length()) { @@ -278,16 +272,18 @@ CharacterIterator FindNormalizedLatinMatchEndPosition( if (i18n_utils::IsAscii(c)) { c = std::tolower(c); } else if (DiacriticCharToAscii(normalizer2, c, &ascii_char)) { - c = ascii_char; + c = std::tolower(ascii_char); + } else { + return false; } UChar32 normalized_c = normalized_char_itr.GetCurrentChar(); if (c != normalized_c) { - return char_itr; + return true; } char_itr.AdvanceToUtf32(char_itr.utf32_index() + 1); normalized_char_itr.AdvanceToUtf32(normalized_char_itr.utf32_index() + 1); } - return char_itr; + return true; } CharacterIterator @@ -357,15 +353,18 @@ CharacterIterator IcuNormalizer::FindNormalizedMatchEndPosition( } CharacterIterator char_itr(term); - UChar32 first_uchar32 = char_itr.GetCurrentChar(); - if (normalizer2 != nullptr && first_uchar32 != i18n_utils::kInvalidUChar32 && - DiacriticCharToAscii(normalizer2, first_uchar32, /*char_out=*/nullptr)) { - return FindNormalizedLatinMatchEndPosition(normalizer2, term, char_itr, - normalized_term); - } else { - return term_transformer_->FindNormalizedNonLatinMatchEndPosition( - term, char_itr, normalized_term); + CharacterIterator normalized_char_itr(normalized_term); + if (FindNormalizedLatinMatchEndPosition( + normalizer2, term, char_itr, normalized_term, normalized_char_itr)) { + return char_itr; } + // Some portion of term couldn't be normalized via + // FindNormalizedLatinMatchEndPosition. Use term_transformer to handle this + // portion. + std::string_view rest_normalized_term = + normalized_term.substr(normalized_char_itr.utf8_index()); + return term_transformer_->FindNormalizedNonLatinMatchEndPosition( + term, char_itr, rest_normalized_term); } } // namespace lib diff --git a/icing/transform/icu/icu-normalizer.h b/icing/transform/icu/icu-normalizer.h index d4f1ebd..7c64506 100644 --- a/icing/transform/icu/icu-normalizer.h +++ b/icing/transform/icu/icu-normalizer.h @@ -101,14 +101,36 @@ class IcuNormalizer : public Normalizer { UTransliterator* u_transliterator_; }; + struct NormalizeLatinResult { + // A string representing the maximum prefix of term (can be empty or term + // itself) that can be normalized into ASCII. + std::string text; + // The first position of the char within term that normalization failed to + // transform into an ASCII char, or term.length() if all chars can be + // transformed. + size_t end_pos; + }; + explicit IcuNormalizer(std::unique_ptr term_transformer, int max_term_byte_size); // Helper method to normalize Latin terms only. Rules applied: // 1. Uppercase to lowercase // 2. Remove diacritic (accent) marks - std::string NormalizeLatin(const UNormalizer2* normalizer2, - std::string_view term) const; + NormalizeLatinResult NormalizeLatin(const UNormalizer2* normalizer2, + std::string_view term) const; + + // Set char_itr and normalized_char_itr to point to one past the end of the + // segments of term and normalized_term that can match if normalized into + // ASCII. In this case, true will be returned. + // + // The method stops at the position when char_itr cannot be normalized into + // ASCII and returns false, so that term_transformer can handle the remaining + // portion. + bool FindNormalizedLatinMatchEndPosition( + const UNormalizer2* normalizer2, std::string_view term, + CharacterIterator& char_itr, std::string_view normalized_term, + CharacterIterator& normalized_char_itr) const; // Used to transform terms into their normalized forms. std::unique_ptr term_transformer_; diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc index 143da17..719f7be 100644 --- a/icing/transform/icu/icu-normalizer_test.cc +++ b/icing/transform/icu/icu-normalizer_test.cc @@ -111,6 +111,7 @@ TEST_F(IcuNormalizerTest, LatinLetterRemoveAccent) { EXPECT_THAT(normalizer_->NormalizeTerm("ÝŶŸẎẏŷýÿ"), Eq("yyyyyyyy")); EXPECT_THAT(normalizer_->NormalizeTerm("ŹŻŽẐẒẔẑẓẕźżž"), Eq("zzzzzzzzzzzz")); + EXPECT_THAT(normalizer_->NormalizeTerm("Barış"), Eq("baris")); } // Accent / diacritic marks won't be removed in non-latin chars, e.g. in @@ -278,6 +279,14 @@ TEST_F(IcuNormalizerTest, PrefixMatchLength) { term = "Buenos días"; match_end = normalizer->FindNormalizedMatchEndPosition(term, "buenos di"); EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Buenos dí")); + + term = "BarışIcing"; + match_end = normalizer->FindNormalizedMatchEndPosition(term, "baris"); + EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış")); + + term = "ÀĄḁáIcing"; + match_end = normalizer->FindNormalizedMatchEndPosition(term, "aaaa"); + EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("ÀĄḁá")); } TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) { @@ -327,6 +336,10 @@ TEST_F(IcuNormalizerTest, SharedPrefixMatchLength) { term = "días"; match_end = normalizer->FindNormalizedMatchEndPosition(term, "diamond"); EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("día")); + + term = "BarışIcing"; + match_end = normalizer->FindNormalizedMatchEndPosition(term, "barismdi"); + EXPECT_THAT(term.substr(0, match_end.utf8_index()), Eq("Barış")); } } // namespace -- cgit v1.2.3