diff options
author | Jiayu Hu <hujiayu@google.com> | 2022-04-26 13:05:36 -0700 |
---|---|---|
committer | Jiayu Hu <hujiayu@google.com> | 2022-04-26 21:22:27 +0000 |
commit | f00787419db907033571b24a611de2ce3cca38c8 (patch) | |
tree | a0080daa25aaa70b7990ed524dda836cd0251e2b | |
parent | c5fa7ff3ae4043c396e81087c0570fd16322341d (diff) | |
download | icing-f00787419db907033571b24a611de2ce3cca38c8.tar.gz |
Sync from upstream.
Descriptions:
======================================================================
Export Icing logging control to JNI
======================================================================
Prepare Icing logging class for JNI export
======================================================================
Export getDebugInfo to JNI
======================================================================
Expose the return_deleted_document_info parameter for deleteByQuery JNI
======================================================================
Enable runtime log control for Icing Library
======================================================================
Fix 1 ClangTidyBuild finding:
======================================================================
Update comments to run benchmarks.
======================================================================
Making icing's own logging class
======================================================================
Convert the string lexicon debug information to a protocol buffer
======================================================================
Fix issue with printing fingerprinted key in our error logs.
======================================================================
Support dump function for IcingSearchEngine
======================================================================
Bug: 146903474
Bug: 229778472
Bug: 209071710
Bug: 222349894
Bug: 225914361
Change-Id: I70056cb261d54cfa869c6bf9b8251752bcfc0142
39 files changed, 1525 insertions, 296 deletions
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc index 80a8011..d7ea4bb 100644 --- a/icing/file/portable-file-backed-proto-log_benchmark.cc +++ b/icing/file/portable-file-backed-proto-log_benchmark.cc @@ -33,7 +33,7 @@ // icing/file:portable-file-backed-proto-log_benchmark // // $ blaze-bin/icing/file/portable-file-backed-proto-log_benchmark -// --benchmarks=all +// --benchmark_filter=all // // // To build and run on an Android device (must be connected and rooted): @@ -48,7 +48,7 @@ // /data/local/tmp/ // // $ adb shell /data/local/tmp/portable-file-backed-proto-log-benchmark -// --benchmarks=all +// --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 952ba21..7ec354a 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -1374,6 +1374,45 @@ StorageInfoResultProto IcingSearchEngine::GetStorageInfo() { return result; } +DebugInfoResultProto IcingSearchEngine::GetDebugInfo(int verbosity) { + DebugInfoResultProto debug_info; + StatusProto* result_status = debug_info.mutable_status(); + absl_ports::shared_lock l(&mutex_); + if (!initialized_) { + debug_info.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION); + debug_info.mutable_status()->set_message( + "IcingSearchEngine has not been initialized!"); + return debug_info; + } + + // Index + *debug_info.mutable_debug_info()->mutable_index_info() = + index_->GetDebugInfo(verbosity); + + // Document Store + libtextclassifier3::StatusOr<DocumentDebugInfoProto> document_debug_info = + document_store_->GetDebugInfo(verbosity); + if (!document_debug_info.ok()) { + TransformStatus(document_debug_info.status(), result_status); + return debug_info; + } + *debug_info.mutable_debug_info()->mutable_document_info() = + std::move(document_debug_info).ValueOrDie(); + + // Schema Store + libtextclassifier3::StatusOr<SchemaDebugInfoProto> schema_debug_info = + schema_store_->GetDebugInfo(); + if (!schema_debug_info.ok()) { + TransformStatus(schema_debug_info.status(), result_status); + return debug_info; + } + *debug_info.mutable_debug_info()->mutable_schema_info() = + std::move(schema_debug_info).ValueOrDie(); + + result_status->set_code(StatusProto::OK); + return debug_info; +} + libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk( PersistType::Code persist_type) { if (persist_type == PersistType::LITE) { diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index ff9c7fb..cb6c281 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -403,6 +403,11 @@ class IcingSearchEngine { // that field will be set to -1. StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_); + // Get debug information for Icing. + // verbosity <= 0, simplest debug information + // verbosity > 0, more detailed debug information as indicated in debug.proto + DebugInfoResultProto GetDebugInfo(int verbosity) ICING_LOCKS_EXCLUDED(mutex_); + // Clears all data from Icing and re-initializes. Clients DO NOT need to call // Initialize again. // diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index 5e610d5..a8d06d1 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -51,7 +51,7 @@ // //icing:icing-search-engine_benchmark // // $ blaze-bin/icing/icing-search-engine_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -61,7 +61,7 @@ // $ adb push blaze-bin/icing/icing-search-engine_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/icing-search-engine_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/icing-search-engine_benchmark --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/icing-search-engine_flush_benchmark.cc b/icing/icing-search-engine_flush_benchmark.cc index de8f550..04e83fe 100644 --- a/icing/icing-search-engine_flush_benchmark.cc +++ b/icing/icing-search-engine_flush_benchmark.cc @@ -48,7 +48,7 @@ // //icing:icing-search-engine_flush_benchmark // // $ blaze-bin/icing/icing-search-engine_flush_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -59,7 +59,7 @@ // /data/local/tmp/ // // $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark -// --benchmarks=all +// --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 7ed8885..29849ef 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -8539,6 +8539,82 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) { ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); } +TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityZeroSucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Create a document. + DocumentProto document = CreateMessageDocument("namespace", "email"); + ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); + + DebugInfoResultProto result = icing.GetDebugInfo(/*verbosity=*/0); + EXPECT_THAT(result.status(), ProtoIsOk()); + + // Some sanity checks + DebugInfoProto debug_info = result.debug_info(); + EXPECT_THAT( + debug_info.index_info().lite_index_info().last_added_document_id(), + Eq(0)); + EXPECT_THAT( + debug_info.document_info().document_storage_info().num_alive_documents(), + Eq(1)); + EXPECT_THAT(debug_info.document_info().corpus_info(), + IsEmpty()); // because verbosity=0 + EXPECT_THAT(debug_info.schema_info().crc(), Gt(0)); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoVerbosityOneSucceedsWithCorpusInfo) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Create 4 documents. + DocumentProto document1 = CreateMessageDocument("namespace1", "email/1"); + DocumentProto document2 = CreateMessageDocument("namespace1", "email/2"); + DocumentProto document3 = CreateMessageDocument("namespace2", "email/3"); + DocumentProto document4 = CreateMessageDocument("namespace2", "email/4"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + + DebugInfoResultProto result = icing.GetDebugInfo(/*verbosity=*/1); + EXPECT_THAT(result.status(), ProtoIsOk()); + + // Some sanity checks + DebugInfoProto debug_info = result.debug_info(); + EXPECT_THAT( + debug_info.index_info().lite_index_info().last_added_document_id(), + Eq(3)); + EXPECT_THAT( + debug_info.document_info().document_storage_info().num_alive_documents(), + Eq(4)); + EXPECT_THAT(debug_info.document_info().corpus_info(), SizeIs(2)); + EXPECT_THAT(debug_info.schema_info().crc(), Gt(0)); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoUninitialized) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + DebugInfoResultProto result = icing.GetDebugInfo(/*verbosity=*/1); + EXPECT_THAT(result.status(), ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoNoSchemaNoDocumentsSucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + DebugInfoResultProto result = icing.GetDebugInfo(/*verbosity=*/1); + ASSERT_THAT(result.status(), ProtoIsOk()); +} + +TEST_F(IcingSearchEngineTest, GetDebugInfoWithSchemaNoDocumentsSucceeds) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + DebugInfoResultProto result = icing.GetDebugInfo(/*verbosity=*/1); + ASSERT_THAT(result.status(), ProtoIsOk()); +} + #ifndef ICING_JNI_TEST // We skip this test case when we're running in a jni_test since the data files // will be stored in the android-instrumented storage location, rather than the diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc index 1aad7d0..68c592c 100644 --- a/icing/index/index-processor_benchmark.cc +++ b/icing/index/index-processor_benchmark.cc @@ -39,7 +39,7 @@ // //icing/index:index-processor_benchmark // // $ blaze-bin/icing/index/index-processor_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/tokenization:language-segmenter depend on @@ -55,7 +55,7 @@ // $ adb push blaze-bin/icing/index/index-processor_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/index-processor_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/index-processor_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc index 8355c01..7a80022 100644 --- a/icing/index/index_test.cc +++ b/icing/index/index_test.cc @@ -1414,7 +1414,11 @@ TEST_F(IndexTest, GetDebugInfo) { EXPECT_FALSE(out0.main_index_info().has_flash_index_storage_info()); EXPECT_THAT(out0.main_index_info().last_added_document_id(), Eq(kDocumentId1)); + EXPECT_THAT(out0.main_index_info().lexicon_info().node_info().num_leaves(), + Eq(3)); EXPECT_THAT(out0.lite_index_info().curr_size(), Eq(2)); + EXPECT_THAT(out0.lite_index_info().lexicon_info().node_info().num_leaves(), + Eq(2)); EXPECT_THAT(out0.lite_index_info().last_added_document_id(), Eq(kDocumentId2)); @@ -1431,15 +1435,18 @@ TEST_F(IndexTest, GetDebugInfo) { IndexDebugInfoProto out2 = index_->GetDebugInfo(/*verbosity=*/0); EXPECT_THAT(out2.lite_index_info().curr_size(), Eq(3)); + EXPECT_THAT(out2.lite_index_info().lexicon_info().node_info().num_leaves(), + Eq(3)); EXPECT_THAT(out2.lite_index_info().last_added_document_id(), Eq(kDocumentId3)); - // Merge into the man index. Debuug strings should change again. + // Merge into the man index. Debug strings should change again. ICING_ASSERT_OK(index_->Merge()); IndexDebugInfoProto out3 = index_->GetDebugInfo(/*verbosity=*/0); EXPECT_TRUE(out3.has_index_storage_info()); - EXPECT_THAT(out3.main_index_info().lexicon_info(), Not(IsEmpty())); + EXPECT_THAT(out3.main_index_info().lexicon_info().node_info().num_leaves(), + Eq(6)); EXPECT_THAT(out3.main_index_info().last_added_document_id(), Eq(kDocumentId3)); EXPECT_THAT(out3.lite_index_info().curr_size(), Eq(0)); @@ -1448,7 +1455,8 @@ TEST_F(IndexTest, GetDebugInfo) { Eq(kInvalidDocumentId)); EXPECT_THAT(out3.lite_index_info().searchable_end(), Eq(0)); EXPECT_THAT(out3.lite_index_info().index_crc(), Gt(0)); - EXPECT_THAT(out3.lite_index_info().lexicon_info(), Not(IsEmpty())); + EXPECT_THAT(out3.lite_index_info().lexicon_info().node_info().num_leaves(), + Eq(0)); } TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) { diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc index f975989..993c3b8 100644 --- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc +++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc @@ -35,7 +35,7 @@ namespace { // // $ // blaze-bin/icing/index/iterator/doc-hit-info-iterator_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // $ blaze build --config=android_arm64 -c opt --dynamic_mode=off @@ -47,7 +47,7 @@ namespace { // /data/local/tmp/ // // $ adb shell /data/local/tmp/doc-hit-info-iterator_benchmark -// --benchmarks=all +// --benchmark_filter=all // Functor to be used with std::generate to create a container of DocHitInfos. // DocHitInfos are generated starting at docid starting_docid and continuing at diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index a5c6baf..dd964df 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -399,7 +399,7 @@ IndexDebugInfoProto::LiteIndexDebugInfoProto LiteIndex::GetDebugInfo( res.set_last_added_document_id(header_->last_added_docid()); res.set_searchable_end(header_->searchable_end()); res.set_index_crc(ComputeChecksum().Get()); - lexicon_.GetDebugInfo(verbosity, res.mutable_lexicon_info()); + *res.mutable_lexicon_info() = lexicon_.GetDebugInfo(verbosity); return res; } diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc index 2d6007b..35ed7ad 100644 --- a/icing/index/main/main-index.cc +++ b/icing/index/main/main-index.cc @@ -612,7 +612,7 @@ IndexDebugInfoProto::MainIndexDebugInfoProto MainIndex::GetDebugInfo( IndexDebugInfoProto::MainIndexDebugInfoProto res; // Lexicon. - main_lexicon_->GetDebugInfo(verbosity, res.mutable_lexicon_info()); + *res.mutable_lexicon_info() = main_lexicon_->GetDebugInfo(verbosity); res.set_last_added_document_id(last_added_document_id()); diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc index 51d3423..f4c066c 100644 --- a/icing/jni/icing-search-engine-jni.cc +++ b/icing/jni/icing-search-engine-jni.cc @@ -15,6 +15,7 @@ #include <jni.h> #include <string> +#include <utility> #include "icing/jni/jni-cache.h" #include <google/protobuf/message_lite.h> @@ -29,6 +30,7 @@ #include "icing/proto/search.pb.h" #include "icing/proto/storage.pb.h" #include "icing/proto/usage.pb.h" +#include "icing/util/logging.h" #include "icing/util/status-macros.h" namespace { @@ -340,7 +342,8 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery( - JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes) { + JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes, + jboolean return_deleted_document_info) { icing::lib::IcingSearchEngine* icing = GetIcingSearchEnginePointer(env, object); @@ -350,7 +353,7 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery( return nullptr; } icing::lib::DeleteByQueryResultProto delete_result_proto = - icing->DeleteByQuery(search_spec_proto); + icing->DeleteByQuery(search_spec_proto, return_deleted_document_info); return SerializeProtoToJniByteArray(env, delete_result_proto); } @@ -439,4 +442,29 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSearchSuggestions( return SerializeProtoToJniByteArray(env, suggestionResponse); } +JNIEXPORT jbyteArray JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeGetDebugInfo( + JNIEnv* env, jclass clazz, jobject object, jint verbosity) { + icing::lib::IcingSearchEngine* icing = + GetIcingSearchEnginePointer(env, object); + + icing::lib::DebugInfoResultProto debug_info_result_proto = + icing->GetDebugInfo(verbosity); + + return SerializeProtoToJniByteArray(env, debug_info_result_proto); +} + +JNIEXPORT jboolean JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeShouldLog( + JNIEnv* env, jclass clazz, jshort severity, jshort verbosity) { + return icing::lib::ShouldLog( + static_cast<icing::lib::LogSeverity::Code>(severity), verbosity); +} + +JNIEXPORT jboolean JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeSetLoggingLevel( + JNIEnv* env, jclass clazz, jshort priority, jshort verbosity) { + return icing::lib::SetLoggingLevel( + static_cast<icing::lib::LogSeverity::Code>(priority), verbosity); +} } // extern "C" diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc index 77876c4..d74b14f 100644 --- a/icing/legacy/index/icing-dynamic-trie.cc +++ b/icing/legacy/index/icing-dynamic-trie.cc @@ -73,6 +73,7 @@ #include <cstdint> #include <cstring> #include <memory> +#include <optional> #include <utility> #include "icing/legacy/core/icing-packed-pod.h" @@ -111,6 +112,26 @@ int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start, } return valid_nexts_length; } + +// Get property id from filename. +std::optional<uint32_t> GetPropertyIDFromFileName(const std::string &filename) { + size_t property_id_start_idx = filename.rfind('.'); + if (property_id_start_idx == std::string::npos) { + ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s", + filename.c_str()); + return std::nullopt; + } + ++property_id_start_idx; // skip dot + char *end; + uint32_t property_id = + strtol(filename.c_str() + property_id_start_idx, &end, 10); // NOLINT + if (!end || end != (filename.c_str() + filename.size())) { + ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s", + filename.c_str()); + return std::nullopt; + } + return property_id; +} } // namespace // Based on the bit field widths. @@ -323,7 +344,7 @@ class IcingDynamicTrie::IcingDynamicTrieStorage { uint32_t value_size() const { return hdr().value_size(); } - void FillDirtyPageStats(Stats *stats) const; + void FillDirtyPageStats(LexiconDebugInfoProto *stats) const; void inc_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() + 1); } @@ -962,10 +983,13 @@ uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::suffixes_left() const { } void IcingDynamicTrie::IcingDynamicTrieStorage::FillDirtyPageStats( - Stats *stats) const { - stats->dirty_pages_nodes = array_storage_[NODE].num_dirty_pages(); - stats->dirty_pages_nexts = array_storage_[NEXT].num_dirty_pages(); - stats->dirty_pages_suffixes = array_storage_[SUFFIX].num_dirty_pages(); + LexiconDebugInfoProto *stats) const { + stats->mutable_node_info()->set_dirty_pages( + array_storage_[NODE].num_dirty_pages()); + stats->mutable_next_info()->set_dirty_pages( + array_storage_[NEXT].num_dirty_pages()); + stats->mutable_suffix_info()->set_dirty_pages( + array_storage_[SUFFIX].num_dirty_pages()); } // Dumper. @@ -1251,19 +1275,8 @@ bool IcingDynamicTrie::InitPropertyBitmaps() { } for (size_t i = 0; i < files.size(); i++) { // Decode property id from filename. - size_t property_id_start_idx = files[i].rfind('.'); - if (property_id_start_idx == std::string::npos) { - ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s", - files[i].c_str()); - continue; - } - property_id_start_idx++; // skip dot - char *end; - uint32_t property_id = - strtol(files[i].c_str() + property_id_start_idx, &end, 10); // NOLINT - if (!end || end != (files[i].c_str() + files[i].size())) { - ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s", - files[i].c_str()); + std::optional<uint32_t> property_id = GetPropertyIDFromFileName(files[i]); + if (!property_id.has_value()) { continue; } std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap( @@ -1277,9 +1290,9 @@ bool IcingDynamicTrie::InitPropertyBitmaps() { } bitmap->Truncate(truncate_idx); if (property_id >= property_bitmaps_.size()) { - property_bitmaps_.resize(property_id + 1); + property_bitmaps_.resize(*property_id + 1); } - property_bitmaps_[property_id] = std::move(bitmap); + property_bitmaps_[*property_id] = std::move(bitmap); } deleted_bitmap_ = OpenAndInitBitmap( @@ -1367,19 +1380,24 @@ uint32_t IcingDynamicTrie::size() const { return storage_->hdr().num_keys(); } -void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats, +void IcingDynamicTrie::CollectStatsRecursive(const Node &node, + LexiconDebugInfoProto *stats, uint32_t depth) const { + LexiconDebugInfoProto::NodeInfo *node_info = stats->mutable_node_info(); + LexiconDebugInfoProto::NextInfo *next_info = stats->mutable_next_info(); + LexiconDebugInfoProto::SuffixInfo *suffix_info = stats->mutable_suffix_info(); if (node.is_leaf()) { - stats->num_leaves++; - stats->sum_depth += depth; - stats->max_depth = max(stats->max_depth, depth); + node_info->set_num_leaves(node_info->num_leaves() + 1); + node_info->set_sum_depth(node_info->sum_depth() + depth); + node_info->set_max_depth(max(node_info->max_depth(), depth)); const char *suffix = storage_->GetSuffix(node.next_index()); - stats->suffixes_used += strlen(suffix) + 1 + value_size(); - if (!suffix[0]) { - stats->null_suffixes++; + suffix_info->set_suffixes_used(suffix_info->suffixes_used() + + strlen(suffix) + 1 + value_size()); + if (suffix[0] == '\0') { + suffix_info->set_num_null_suffixes(suffix_info->num_null_suffixes() + 1); } } else { - stats->num_intermediates++; + node_info->set_num_intermediates(node_info->num_intermediates() + 1); uint32_t i = 0; for (; i < (1U << node.log2_num_children()); i++) { const Next &next = *storage_->GetNext(node.next_index(), i); @@ -1392,30 +1410,42 @@ void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats, if (i == 0) { ICING_LOG(FATAL) << "No valid node in 'next' array"; } - stats->sum_children += i; - stats->max_children = max(stats->max_children, i); + node_info->set_sum_children(node_info->sum_children() + i); + node_info->set_max_children(max(node_info->max_children(), i)); - stats->child_counts[i - 1]++; - stats->wasted[node.log2_num_children()] += - (1 << node.log2_num_children()) - i; - stats->total_wasted += (1 << node.log2_num_children()) - i; + if (next_info->child_counts_size() > 0) { + next_info->set_child_counts(i - 1, next_info->child_counts(i - 1) + 1); + } + uint32_t wasted = (1 << node.log2_num_children()) - i; + next_info->set_wasted(node.log2_num_children(), + next_info->wasted(node.log2_num_children()) + wasted); + next_info->set_total_wasted(next_info->total_wasted() + wasted); } } -void IcingDynamicTrie::CollectStats(Stats *stats) const { +void IcingDynamicTrie::CollectStats(LexiconDebugInfoProto *stats, + int verbosity) const { if (!is_initialized()) { ICING_LOG(FATAL) << "DynamicTrie not initialized"; } - memset(stats, 0, sizeof(*stats)); + LexiconDebugInfoProto::NodeInfo *node_info = stats->mutable_node_info(); + LexiconDebugInfoProto::NextInfo *next_info = stats->mutable_next_info(); + LexiconDebugInfoProto::SuffixInfo *suffix_info = stats->mutable_suffix_info(); + + if (verbosity > 0) { + next_info->mutable_child_counts()->Resize(kMaxNextArraySize, 0); + } + next_info->mutable_wasted()->Resize(kNumNextAllocationBuckets, 0); + next_info->mutable_num_free()->Resize(kNumNextAllocationBuckets, 0); - stats->num_keys = storage_->hdr().num_keys(); - stats->num_nodes = storage_->hdr().num_nodes(); - stats->max_nodes = storage_->hdr().max_nodes(); - stats->num_nexts = storage_->hdr().num_nexts(); - stats->max_nexts = storage_->hdr().max_nexts(); - stats->suffixes_size = storage_->hdr().suffixes_size(); - stats->max_suffixes_size = storage_->hdr().max_suffixes_size(); + stats->set_num_keys(storage_->hdr().num_keys()); + node_info->set_num_nodes(storage_->hdr().num_nodes()); + node_info->set_max_nodes(storage_->hdr().max_nodes()); + next_info->set_num_nexts(storage_->hdr().num_nexts()); + next_info->set_max_nexts(storage_->hdr().max_nexts()); + suffix_info->set_suffixes_capacity(storage_->hdr().suffixes_size()); + suffix_info->set_max_suffixes_capacity(storage_->hdr().max_suffixes_size()); // Stats collected from traversing the trie. if (!storage_->empty()) { @@ -1426,80 +1456,23 @@ void IcingDynamicTrie::CollectStats(Stats *stats) const { for (int i = 0; i < kNumNextAllocationBuckets; i++) { for (uint32_t cur = storage_->hdr().free_lists(i); cur != kInvalidNextIndex; cur = storage_->GetNext(cur, 0)->next_index()) { - stats->num_free[i]++; + next_info->set_num_free(i, next_info->num_free(i) + 1); } - stats->total_free += stats->num_free[i] * (1 << i); + next_info->set_total_free(next_info->total_free() + + next_info->num_free(i) * (1 << i)); } // Dirty page counts. storage_->FillDirtyPageStats(stats); -} - -std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const { - std::string ret; - IcingStringUtil::SStringAppendF( - &ret, 0, - "Keys %u " - "Nodes (%u/%u) %.3f%% " - "Nexts (%u/%u) %.3f%% " - "Suffixes (%u/%u) %.3f%%\n", - num_keys, num_nodes, max_nodes, - 100. * math_util::SafeDivide(num_nodes, max_nodes), num_nexts, max_nexts, - 100. * math_util::SafeDivide(num_nexts, max_nexts), suffixes_size, - max_suffixes_size, - 100. * math_util::SafeDivide(suffixes_size, max_suffixes_size)); - - if (verbosity > 0) { - for (int i = 0; i < kNumNextAllocationBuckets; i++) { - if (num_free[i] > 0) { - IcingStringUtil::SStringAppendF(&ret, 0, "Freelist@%d: %u\n", 1 << i, - num_free[i]); - } - } - IcingStringUtil::SStringAppendF( - &ret, 0, "Freelist total: %u/%u %.3f%%\n", total_free, num_nexts, - 100. * math_util::SafeDivide(total_free, num_nexts)); - for (int i = 0; i < 256; i++) { - if (child_counts[i] > 0) { - IcingStringUtil::SStringAppendF(&ret, 0, "Child count@%d: %u\n", i + 1, - child_counts[i]); - } - } - for (int i = 0; i < kNumNextAllocationBuckets; i++) { - IcingStringUtil::SStringAppendF(&ret, 0, "Wasted@%d: %u\n", 1 << i, - wasted[i]); - } - IcingStringUtil::SStringAppendF( - &ret, 0, - "Wasted total: %u\n" - "Num intermediates %u num leaves %u " - "suffixes used %u null %u\n" - "avg and max children for intermediates: %.3f, %u\n" - "avg and max depth for leaves: %.3f, %u\n" - "Total next frag: %.3f%%\n", - total_wasted, num_intermediates, num_leaves, suffixes_used, - null_suffixes, 1. * sum_children / num_intermediates, max_children, - 1. * sum_depth / num_leaves, max_depth, - 100. * math_util::SafeDivide((total_free + total_wasted), num_nexts)); - } - IcingStringUtil::SStringAppendF( - &ret, 0, "Memory usage: %zu/%zu bytes\n", - num_nodes * sizeof(Node) + num_nexts * sizeof(Next) + suffixes_size, - max_nodes * sizeof(Node) + max_nexts * sizeof(Next) + max_suffixes_size); - - IcingStringUtil::SStringAppendF( - &ret, 0, "Dirty pages: nodes %u/%.0f nexts %u/%.0f suffixes %u/%.0f\n", - dirty_pages_nodes, - math_util::SafeDivide(num_nodes * sizeof(Node) + getpagesize() - 1, - getpagesize()), - dirty_pages_nexts, - math_util::SafeDivide(num_nexts * sizeof(Next) + getpagesize() - 1, - getpagesize()), - dirty_pages_suffixes, - math_util::SafeDivide(suffixes_size + getpagesize() - 1, getpagesize())); - - return ret; + // Some helper calculations to provide better readability. + node_info->set_avg_children(math_util::SafeDivide( + node_info->sum_children(), node_info->num_intermediates())); + node_info->set_avg_depth( + math_util::SafeDivide(node_info->sum_depth(), node_info->num_leaves())); + next_info->set_total_frag(math_util::SafeDivide( + (next_info->total_free() + next_info->total_wasted()), + next_info->num_nexts())); } void IcingDynamicTrie::DumpTrie(std::ostream *pretty_print, @@ -2275,29 +2248,42 @@ std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key, return prefix_lengths; } -void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const { - Stats stats; - CollectStats(&stats); - out->append(stats.DumpStats(verbosity)); +LexiconDebugInfoProto IcingDynamicTrie::GetDebugInfo(int verbosity) const { + LexiconDebugInfoProto stats; + CollectStats(&stats, verbosity); + + if (verbosity <= 0) { + return stats; + } - // Property files. + // Property files summary. vector<std::string> files; if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(), &files)) { ICING_LOG(ERROR) << IcingStringUtil::StringPrintf( "Could not get files at prefix %s", property_bitmaps_prefix_.c_str()); - return; + return stats; } + LexiconDebugInfoProto::PropertyBitmapInfo *deleted_bitmap = + stats.add_property_bitmaps_info(); + deleted_bitmap->set_property_id(-1); + deleted_bitmap->set_file_size( + filesystem_->GetFileSize(deleted_bitmap_filename_.c_str())); for (size_t i = 0; i < files.size(); i++) { - IcingStringUtil::SStringAppendF( - out, 1000, "Prop file %s size %" PRIu64 "\n", - filesystem_->GetBasename(files[i].c_str()).c_str(), - filesystem_->GetFileSize(files[i].c_str())); + LexiconDebugInfoProto::PropertyBitmapInfo *info = + stats.add_property_bitmaps_info(); + std::optional<uint32_t> property_id = GetPropertyIDFromFileName(files[i]); + if (!property_id.has_value()) { + continue; + } + info->set_property_id(*property_id); + info->set_file_size(filesystem_->GetFileSize(files[i].c_str())); } - IcingStringUtil::SStringAppendF( - out, 1000, "Deleted file %s size %" PRIu64 "\n", - filesystem_->GetBasename(deleted_bitmap_filename_.c_str()).c_str(), - filesystem_->GetFileSize(deleted_bitmap_filename_.c_str())); + return stats; +} + +void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const { + *out = GetDebugInfo(verbosity).DebugString(); } double IcingDynamicTrie::min_free_fraction() const { diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h index 013b926..abb4f1a 100644 --- a/icing/legacy/index/icing-dynamic-trie.h +++ b/icing/legacy/index/icing-dynamic-trie.h @@ -47,6 +47,7 @@ #include "icing/legacy/index/icing-mmapper.h" #include "icing/legacy/index/icing-storage.h" #include "icing/legacy/index/proto/icing-dynamic-trie-header.pb.h" +#include "icing/proto/debug.pb.h" #include "icing/util/i18n-utils.h" #include "unicode/utf8.h" @@ -143,58 +144,6 @@ class IcingDynamicTrie : public IIcingStorage { static const uint32_t kNoCrc = 0; - struct Stats { - uint32_t num_keys; - - // Node stats - - uint32_t num_nodes; - uint32_t max_nodes; - // Count of intermediate nodes. - uint32_t num_intermediates; - // Total and maximum number of children of intermediate nodes. - uint32_t sum_children, max_children; - - // Count of leaf nodes. - uint32_t num_leaves; - // Total and maximum depth of leaf nodes. - uint32_t sum_depth, max_depth; - - // Next stats - - uint32_t num_nexts; - uint32_t max_nexts; - // Count of next arrays by size. - uint32_t child_counts[kMaxNextArraySize]; - // Wasted next array space per allocation bucket (in Nexts, not - // bytes). - uint32_t wasted[kNumNextAllocationBuckets]; - // Sum of wasted array. - uint32_t total_wasted; - - // Suffix stats - - uint32_t suffixes_size; - uint32_t max_suffixes_size; - // Bytes actually used by suffixes. - uint32_t suffixes_used; - // Number of suffixes that are just empty strings. - uint32_t null_suffixes; - - // Next free-list stats - uint32_t num_free[kNumNextAllocationBuckets]; - // Total Next nodes free (weighted sum of the above). - uint32_t total_free; - - // Dirty pages. - uint32_t dirty_pages_nodes; - uint32_t dirty_pages_nexts; - uint32_t dirty_pages_suffixes; - - // TODO(b/222349894) Convert the string output to a protocol buffer instead. - std::string DumpStats(int verbosity) const; - }; - // Options when creating the trie. Maximums for the node/next/suffix // arrays must be specified in advance. struct Options { @@ -281,7 +230,7 @@ class IcingDynamicTrie : public IIcingStorage { uint32_t size() const; // Collecting stats. - void CollectStats(Stats *stats) const; + void CollectStats(LexiconDebugInfoProto *stats, int verbosity) const; // Gets all of the contents of the trie for debugging purposes. Note: this // stores the entire set of terms in memory. @@ -400,6 +349,10 @@ class IcingDynamicTrie : public IIcingStorage { // itself. If utf8 is true, does not cut key mid-utf8. std::vector<int> FindBranchingPrefixLengths(const char *key, bool utf8) const; + // Returns debug information for the dynamic trie. + // verbosity <= 0, simplest debug information + // verbosity > 0, more detailed debug information as indicated in debug.proto + LexiconDebugInfoProto GetDebugInfo(int verbosity) const; void GetDebugInfo(int verbosity, std::string *out) const override; double min_free_fraction() const; @@ -607,7 +560,7 @@ class IcingDynamicTrie : public IIcingStorage { static const uint32_t kInvalidSuffixIndex; // Stats helpers. - void CollectStatsRecursive(const Node &node, Stats *stats, + void CollectStatsRecursive(const Node &node, LexiconDebugInfoProto *stats, uint32_t depth = 0) const; // Helpers for Find and Insert. diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc index 193765b..d2cf48d 100644 --- a/icing/legacy/index/icing-dynamic-trie_test.cc +++ b/icing/legacy/index/icing-dynamic-trie_test.cc @@ -27,15 +27,21 @@ #include "gtest/gtest.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/legacy/index/icing-filesystem.h" +#include "icing/proto/debug.pb.h" #include "icing/testing/tmp-directory.h" -using testing::ElementsAre; - namespace icing { namespace lib { namespace { +using ::testing::Each; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Gt; +using ::testing::IsEmpty; +using ::testing::UnorderedElementsAre; + constexpr std::string_view kKeys[] = { "", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg", }; @@ -56,9 +62,8 @@ class IcingDynamicTrieTest : public ::testing::Test { // Output trie stats to stderr. static void StatsDump(const IcingDynamicTrie& trie) { - IcingDynamicTrie::Stats stats; - trie.CollectStats(&stats); - DLOG(INFO) << "Stats:\n" << stats.DumpStats(true); + DLOG(INFO) << "Stats:\n" + << trie.GetDebugInfo(/*verbosity=*/1).DebugString(); } static void AddToTrie(IcingDynamicTrie* trie, uint32_t num_keys) { @@ -1133,5 +1138,189 @@ TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) { ASSERT_EQ(0, trie.property_bitmaps_.size()); } +TEST_F(IcingDynamicTrieTest, GetDebugInfo) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + IcingDynamicTrie::Options option; + ASSERT_TRUE(trie.CreateIfNotExist(option)); + ASSERT_TRUE(trie.Init()); + + uint32_t unused = 0; + ASSERT_TRUE(trie.Insert("", &unused)); + uint32_t val_idx; + ASSERT_TRUE(trie.Insert("ab", &unused, &val_idx, false)); + trie.SetProperty(val_idx, 0); + ASSERT_TRUE(trie.Insert("ac", &unused)); + ASSERT_TRUE(trie.Insert("abd", &unused)); + ASSERT_TRUE(trie.Insert("bac", &unused)); + ASSERT_TRUE(trie.Insert("bb", &unused)); + ASSERT_TRUE(trie.Insert("bacd", &unused)); + ASSERT_TRUE(trie.Insert("abbb", &unused)); + ASSERT_TRUE(trie.Insert("abcdefg", &unused)); + StatsDump(trie); + + LexiconDebugInfoProto info0 = trie.GetDebugInfo(/*verbosity=*/1); + EXPECT_EQ(info0.num_keys(), 9); + EXPECT_EQ(info0.node_info().num_nodes(), 15); + EXPECT_EQ(info0.node_info().max_nodes(), option.max_nodes); + EXPECT_EQ(info0.node_info().num_intermediates(), 6); + EXPECT_EQ(info0.node_info().sum_children(), 14); + EXPECT_EQ(info0.node_info().max_children(), 4); + EXPECT_EQ(info0.node_info().avg_children(), (float)14 / 6); + EXPECT_EQ(info0.node_info().num_leaves(), 9); + EXPECT_EQ(info0.node_info().sum_depth(), 25); + EXPECT_EQ(info0.node_info().max_depth(), 4); + EXPECT_EQ(info0.node_info().avg_depth(), (float)25 / 9); + + EXPECT_EQ(info0.next_info().num_nexts(), 17); + EXPECT_EQ(info0.next_info().max_nexts(), option.max_nexts); + uint32_t exp_child_counts[IcingDynamicTrie::kMaxNextArraySize] = {1, 3, 1, 1}; + EXPECT_THAT(info0.next_info().child_counts(), + ElementsAreArray(exp_child_counts)); + EXPECT_THAT(info0.next_info().wasted(), + ElementsAre(0, 0, 1, 0, 0, 0, 0, 0, 0)); + EXPECT_EQ(info0.next_info().total_wasted(), 1); + EXPECT_THAT(info0.next_info().num_free(), + ElementsAre(0, 1, 0, 0, 0, 0, 0, 0, 0)); + EXPECT_EQ(info0.next_info().total_free(), 2); + EXPECT_EQ(info0.next_info().total_frag(), (float)(2 + 1) / 17); + + EXPECT_THAT(info0.suffix_info().suffixes_capacity(), Gt(0)); + EXPECT_EQ(info0.suffix_info().max_suffixes_capacity(), + option.max_suffixes_size); + EXPECT_THAT(info0.suffix_info().suffixes_used(), Gt(0)); + EXPECT_EQ(info0.suffix_info().num_null_suffixes(), 7); + + EXPECT_EQ(info0.node_info().dirty_pages(), 1); + EXPECT_EQ(info0.next_info().dirty_pages(), 1); + EXPECT_EQ(info0.suffix_info().dirty_pages(), 1); + + std::vector<uint32_t> property_ids; + for (int i = 0; i < info0.property_bitmaps_info_size(); i++) { + property_ids.push_back(info0.property_bitmaps_info(i).property_id()); + } + EXPECT_THAT(property_ids, UnorderedElementsAre(-1, 0)); + + LexiconDebugInfoProto info1 = trie.GetDebugInfo(/*verbosity=*/0); + EXPECT_THAT(info1.next_info().child_counts(), IsEmpty()); + EXPECT_THAT(info1.property_bitmaps_info(), IsEmpty()); +} + +TEST_F(IcingDynamicTrieTest, GetDebugInfoForEmptyTrie) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + IcingDynamicTrie::Options option; + ASSERT_TRUE(trie.CreateIfNotExist(option)); + ASSERT_TRUE(trie.Init()); + + LexiconDebugInfoProto info = trie.GetDebugInfo(/*verbosity=*/1); + EXPECT_EQ(info.num_keys(), 0); + EXPECT_EQ(info.node_info().num_nodes(), 0); + EXPECT_EQ(info.node_info().max_nodes(), option.max_nodes); + EXPECT_EQ(info.node_info().num_intermediates(), 0); + EXPECT_EQ(info.node_info().sum_children(), 0); + EXPECT_EQ(info.node_info().max_children(), 0); + EXPECT_EQ(info.node_info().num_leaves(), 0); + EXPECT_EQ(info.node_info().sum_depth(), 0); + EXPECT_EQ(info.node_info().max_depth(), 0); + + EXPECT_EQ(info.next_info().num_nexts(), 0); + EXPECT_EQ(info.next_info().max_nexts(), option.max_nexts); + EXPECT_THAT(info.next_info().child_counts(), Each(0)); + EXPECT_THAT(info.next_info().wasted(), Each(0)); + EXPECT_EQ(info.next_info().total_wasted(), 0); + EXPECT_THAT(info.next_info().num_free(), Each(0)); + EXPECT_EQ(info.next_info().total_free(), 0); + + EXPECT_EQ(info.suffix_info().suffixes_capacity(), 0); + EXPECT_EQ(info.suffix_info().max_suffixes_capacity(), + option.max_suffixes_size); + EXPECT_EQ(info.suffix_info().suffixes_used(), 0); + EXPECT_EQ(info.suffix_info().num_null_suffixes(), 0); + + EXPECT_EQ(info.node_info().dirty_pages(), 0); + EXPECT_EQ(info.next_info().dirty_pages(), 0); + EXPECT_EQ(info.suffix_info().dirty_pages(), 0); +} + +TEST_F(IcingDynamicTrieTest, GetDebugInfoCorrectForWastedAndChildCounts) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t unused = 0; + ASSERT_TRUE(trie.Insert("", &unused)); + ASSERT_TRUE(trie.Insert("ab", &unused)); + PrintTrie(trie); + LexiconDebugInfoProto::NextInfo info = + trie.GetDebugInfo(/*verbosity=*/1).next_info(); + // Root has 2 children "" and "ab", which are leaves. No wasted. + EXPECT_THAT(info.wasted(), Each(0)); + uint32_t exp_child_counts1[IcingDynamicTrie::kMaxNextArraySize] = {0, 1}; + EXPECT_THAT(info.child_counts(), ElementsAreArray(exp_child_counts1)); + + ASSERT_TRUE(trie.Insert("ac", &unused)); + PrintTrie(trie); + info = trie.GetDebugInfo(/*verbosity=*/1).next_info(); + // Root has 2 children "" and "a". + // "a" has 2 children "b" and "c". + // No wasted. + EXPECT_THAT(info.wasted(), Each(0)); + uint32_t exp_child_counts2[IcingDynamicTrie::kMaxNextArraySize] = {0, 2}; + EXPECT_THAT(info.child_counts(), ElementsAreArray(exp_child_counts2)); + + ASSERT_TRUE(trie.Insert("ad", &unused)); + PrintTrie(trie); + info = trie.GetDebugInfo(/*verbosity=*/1).next_info(); + // Root has 2 children "" and "a". + // "a" has 3 children "b", "c", and "d". + // 1 next wasted for "a", since 2^2 - 3 = 1 + EXPECT_THAT(info.wasted(), ElementsAre(0, 0, 1, 0, 0, 0, 0, 0, 0)); + uint32_t exp_child_counts3[IcingDynamicTrie::kMaxNextArraySize] = {0, 1, 1}; + EXPECT_THAT(info.child_counts(), ElementsAreArray(exp_child_counts3)); +} + +TEST_F(IcingDynamicTrieTest, GetDebugInfoCorrectForFreeList) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t unused = 0; + ASSERT_TRUE(trie.Insert("", &unused)); + ASSERT_TRUE(trie.Insert("ab", &unused)); + ASSERT_TRUE(trie.Insert("ac", &unused)); + // No next arrays are freed yet. + LexiconDebugInfoProto::NextInfo info = + trie.GetDebugInfo(/*verbosity=*/0).next_info(); + EXPECT_THAT(info.num_free(), Each(0)); + EXPECT_EQ(info.total_free(), 0); + + ASSERT_TRUE(trie.Insert("ad", &unused)); + info = trie.GetDebugInfo(/*verbosity=*/0).next_info(); + // The next array of "a" with size 2 has been freed in order to expand to + // size 4. + EXPECT_THAT(info.num_free(), ElementsAre(0, 1, 0, 0, 0, 0, 0, 0, 0)); + EXPECT_EQ(info.total_free(), 2); + + ASSERT_TRUE(trie.Insert("ae", &unused)); + info = trie.GetDebugInfo(/*verbosity=*/0).next_info(); + // No change + EXPECT_THAT(info.num_free(), ElementsAre(0, 1, 0, 0, 0, 0, 0, 0, 0)); + EXPECT_EQ(info.total_free(), 2); + + ASSERT_TRUE(trie.Insert("af", &unused)); + info = trie.GetDebugInfo(/*verbosity=*/0).next_info(); + // The next array of "a" with size 4 has been freed in order to expand to + // size 8. + EXPECT_THAT(info.num_free(), ElementsAre(0, 1, 1, 0, 0, 0, 0, 0, 0)); + EXPECT_EQ(info.total_free(), 2 + 4); +} + } // namespace lib } // namespace icing diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc index e48fe78..b505ac5 100644 --- a/icing/query/query-processor_benchmark.cc +++ b/icing/query/query-processor_benchmark.cc @@ -37,7 +37,7 @@ // //icing/query:query-processor_benchmark // // $ blaze-bin/icing/query/query-processor_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/tokenization:language-segmenter depend on @@ -53,7 +53,7 @@ // $ adb push blaze-bin/icing/query/query-processor_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/query-processor_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/scoring/ranker_benchmark.cc b/icing/scoring/ranker_benchmark.cc index 8983dd9..c2f13de 100644 --- a/icing/scoring/ranker_benchmark.cc +++ b/icing/scoring/ranker_benchmark.cc @@ -27,7 +27,7 @@ namespace { // $ blaze build -c opt --dynamic_mode=off --copt=-gmlt // //icing/scoring:ranker_benchmark // -// $ blaze-bin/icing/scoring/ranker_benchmark --benchmarks=all +// $ blaze-bin/icing/scoring/ranker_benchmark --benchmark_filter=all // --benchmark_memory_usage // // Run on an Android device: @@ -38,7 +38,7 @@ namespace { // $ adb push blaze-bin/icing/scoring/ranker_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/ranker_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/ranker_benchmark --benchmark_filter=all void BM_GetTopN(benchmark::State& state) { int num_to_score = state.range(0); diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc index cc1d995..44dda3c 100644 --- a/icing/scoring/score-and-rank_benchmark.cc +++ b/icing/scoring/score-and-rank_benchmark.cc @@ -49,7 +49,7 @@ // //icing/scoring:score-and-rank_benchmark // // $ blaze-bin/icing/scoring/score-and-rank_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -59,7 +59,7 @@ // $ adb push blaze-bin/icing/scoring/score-and-rank_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/score-and-rank_benchmark --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc index 5e23a8e..1739a50 100644 --- a/icing/store/document-log-creator.cc +++ b/icing/store/document-log-creator.cc @@ -18,7 +18,6 @@ #include <string> #include <utility> -#include "icing/text_classifier/lib3/utils/base/logging.h" #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/annotate.h" diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 8c8369c..7a1db7a 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -53,6 +53,7 @@ #include "icing/util/clock.h" #include "icing/util/crc32.h" #include "icing/util/data-loss.h" +#include "icing/util/fingerprint-util.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" @@ -125,22 +126,13 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) { // overhead per key. As we know that these fingerprints are always 8-bytes in // length and that they're random, we might be able to store them more // compactly. -std::string MakeFingerprint(std::string_view name_space, std::string_view uri) { +std::string MakeFingerprint(std::string_view field1, std::string_view field2) { // Using a 64-bit fingerprint to represent the key could lead to collisions. // But, even with 200K unique keys, the probability of collision is about // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack). uint64_t fprint = - tc3farmhash::Fingerprint64(absl_ports::StrCat(name_space, uri)); - - std::string encoded_fprint; - // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in - // base128 and add 1 to make sure that no byte is '0'. This increases the - // size of the encoded_fprint from 8-bytes to 10-bytes. - while (fprint) { - encoded_fprint.push_back((fprint & 0x7F) + 1); - fprint >>= 7; - } - return encoded_fprint; + tc3farmhash::Fingerprint64(absl_ports::StrCat(field1, field2)); + return fingerprint_util::GetFingerprintString(fprint); } int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms, @@ -349,7 +341,8 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() { // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN // that can support error logging. auto document_key_mapper_or = - KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize); + KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>:: + Create(*filesystem_, base_dir_, kUriMapperMaxSize); if (!document_key_mapper_or.ok()) { ICING_LOG(ERROR) << document_key_mapper_or.status().error_message() << "Failed to initialize KeyMapper"; @@ -389,10 +382,14 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() { usage_store_, UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_))); - ICING_ASSIGN_OR_RETURN(corpus_mapper_, - KeyMapper<CorpusId>::Create( - *filesystem_, MakeCorpusMapperFilename(base_dir_), - kCorpusMapperMaxSize)); + auto corpus_mapper_or = + KeyMapper<CorpusId, fingerprint_util::FingerprintStringFormatter>::Create( + *filesystem_, MakeCorpusMapperFilename(base_dir_), + kCorpusMapperMaxSize); + if (!corpus_mapper_or.ok()) { + return std::move(corpus_mapper_or).status(); + } + corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie(); ICING_ASSIGN_OR_RETURN(corpus_score_cache_, FileBackedVector<CorpusAssociatedScoreData>::Create( @@ -571,7 +568,8 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() { // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN // that can support error logging. auto document_key_mapper_or = - KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize); + KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>:: + Create(*filesystem_, base_dir_, kUriMapperMaxSize); if (!document_key_mapper_or.ok()) { ICING_LOG(ERROR) << document_key_mapper_or.status().error_message() << "Failed to re-init key mapper"; @@ -675,10 +673,14 @@ libtextclassifier3::Status DocumentStore::ResetCorpusMapper() { << "Failed to delete old corpus_id mapper"; return status; } - ICING_ASSIGN_OR_RETURN(corpus_mapper_, - KeyMapper<CorpusId>::Create( - *filesystem_, MakeCorpusMapperFilename(base_dir_), - kCorpusMapperMaxSize)); + auto corpus_mapper_or = + KeyMapper<CorpusId, fingerprint_util::FingerprintStringFormatter>::Create( + *filesystem_, MakeCorpusMapperFilename(base_dir_), + kCorpusMapperMaxSize); + if (!corpus_mapper_or.ok()) { + return std::move(corpus_mapper_or).status(); + } + corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie(); return libtextclassifier3::Status::OK; } diff --git a/icing/store/document-store.h b/icing/store/document-store.h index e6d2e5c..7b7da1f 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -48,6 +48,7 @@ #include "icing/util/crc32.h" #include "icing/util/data-loss.h" #include "icing/util/document-validator.h" +#include "icing/util/fingerprint-util.h" namespace icing { namespace lib { @@ -455,7 +456,9 @@ class DocumentStore { std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_; // Key (namespace + uri) to DocumentId mapping - std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_; + std::unique_ptr< + KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>> + document_key_mapper_; // DocumentId to file offset mapping std::unique_ptr<FileBackedVector<int64_t>> document_id_mapper_; @@ -491,7 +494,9 @@ class DocumentStore { // unique id. A coprus is assigned an // id when the first document belonging to that corpus is added to the // DocumentStore. Corpus ids may be removed from the mapper during compaction. - std::unique_ptr<KeyMapper<CorpusId>> corpus_mapper_; + std::unique_ptr< + KeyMapper<CorpusId, fingerprint_util::FingerprintStringFormatter>> + corpus_mapper_; // A storage class that caches all usage scores. Usage scores are not // considered as ground truth. Usage scores are associated with document ids diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index fc3fd9d..04776cf 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -46,7 +46,7 @@ // //icing/store:document-store_benchmark // // $ blaze-bin/icing/store/document-store_benchmark -// --benchmarks=all --benchmark_memory_usage +// --benchmark_filter=all --benchmark_memory_usage // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -57,7 +57,7 @@ // /data/local/tmp/ // // $ adb shell /data/local/tmp/document-store_benchmark -// --benchmarks=all +// --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 96d11bf..0f0eb62 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -485,6 +485,35 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) { EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } +TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + // Validates that deleting something non-existing won't append anything to + // ground truth + int64_t document_log_size_before = filesystem_.GetFileSize( + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); + + libtextclassifier3::Status status = + document_store->Delete("android$contacts/", "661"); + EXPECT_THAT(status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + for (char c : status.error_message()) { + EXPECT_THAT(std::isprint(c), IsTrue()); + } + + int64_t document_log_size_after = filesystem_.GetFileSize( + absl_ports::StrCat(document_store_dir_, "/", + DocumentLogCreator::GetDocumentLogFilename()) + .c_str()); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); +} + TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, diff --git a/icing/store/key-mapper.h b/icing/store/key-mapper.h index 23c7b69..7a1368a 100644 --- a/icing/store/key-mapper.h +++ b/icing/store/key-mapper.h @@ -26,6 +26,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/absl_ports/str_join.h" #include "icing/file/filesystem.h" #include "icing/legacy/index/icing-dynamic-trie.h" #include "icing/legacy/index/icing-filesystem.h" @@ -39,7 +40,7 @@ namespace lib { // type. // // KeyMapper is thread-compatible -template <typename T> +template <typename T, typename Formatter = absl_ports::DefaultFormatter> class KeyMapper { public: // Returns an initialized instance of KeyMapper that can immediately handle @@ -51,9 +52,9 @@ class KeyMapper { // KeyMapper, then this existing data would be loaded. Otherwise, // an empty KeyMapper would be created. // maximum_size_bytes : The maximum allowable size of the key mapper storage. - static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create( - const Filesystem& filesystem, std::string_view base_dir, - int maximum_size_bytes); + static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T, Formatter>>> + Create(const Filesystem& filesystem, std::string_view base_dir, + int maximum_size_bytes); // Deletes all the files associated with the KeyMapper. Returns success or any // encountered IO errors @@ -153,10 +154,11 @@ class KeyMapper { "T must be trivially copyable"); }; -template <typename T> -libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> -KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir, - int maximum_size_bytes) { +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T, Formatter>>> +KeyMapper<T, Formatter>::Create(const Filesystem& filesystem, + std::string_view base_dir, + int maximum_size_bytes) { // We create a subdirectory since the trie creates and stores multiple files. // This makes it easier to isolate the trie files away from other files that // could potentially be in the same base_dir, and makes it easier to delete. @@ -166,14 +168,15 @@ KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir, return absl_ports::InternalError(absl_ports::StrCat( "Failed to create KeyMapper directory: ", key_mapper_dir)); } - auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir)); + auto mapper = std::unique_ptr<KeyMapper<T, Formatter>>( + new KeyMapper<T, Formatter>(key_mapper_dir)); ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes)); return mapper; } -template <typename T> -libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem, - std::string_view base_dir) { +template <typename T, typename Formatter> +libtextclassifier3::Status KeyMapper<T, Formatter>::Delete( + const Filesystem& filesystem, std::string_view base_dir) { std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir); if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) { return absl_ports::InternalError(absl_ports::StrCat( @@ -182,16 +185,17 @@ libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem, return libtextclassifier3::Status::OK; } -template <typename T> -KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir) +template <typename T, typename Formatter> +KeyMapper<T, Formatter>::KeyMapper(std::string_view key_mapper_dir) : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)), trie_(file_prefix_, IcingDynamicTrie::RuntimeOptions().set_storage_policy( IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc), &icing_filesystem_) {} -template <typename T> -libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) { +template <typename T, typename Formatter> +libtextclassifier3::Status KeyMapper<T, Formatter>::Initialize( + int maximum_size_bytes) { IcingDynamicTrie::Options options; // Divide the max space between the three internal arrays: nodes, nexts and // suffixes. MaxNodes and MaxNexts are in units of their own data structures. @@ -213,15 +217,16 @@ libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) { return libtextclassifier3::Status::OK; } -template <typename T> -libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key, - T next_value) { +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<T> KeyMapper<T, Formatter>::GetOrPut( + std::string_view key, T next_value) { std::string string_key(key); uint32_t value_index; if (!trie_.Insert(string_key.c_str(), &next_value, &value_index, /*replace=*/false)) { - return absl_ports::InternalError(absl_ports::StrCat( - "Unable to insert key ", key, " into KeyMapper ", file_prefix_, ".")); + return absl_ports::InternalError( + absl_ports::StrCat("Unable to insert key ", Formatter()(string_key), + " into KeyMapper ", file_prefix_, ".")); } // This memory address could be unaligned since we're just grabbing the value // from somewhere in the trie's suffix array. The suffix array is filled with @@ -236,34 +241,39 @@ libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key, return aligned_value; } -template <typename T> -libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) { +template <typename T, typename Formatter> +libtextclassifier3::Status KeyMapper<T, Formatter>::Put(std::string_view key, + T value) { std::string string_key(key); if (!trie_.Insert(string_key.c_str(), &value)) { - return absl_ports::InternalError(absl_ports::StrCat( - "Unable to insert key ", key, " into KeyMapper ", file_prefix_, ".")); + return absl_ports::InternalError( + absl_ports::StrCat("Unable to insert key ", Formatter()(string_key), + " into KeyMapper ", file_prefix_, ".")); } return libtextclassifier3::Status::OK; } -template <typename T> -libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const { +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<T> KeyMapper<T, Formatter>::Get( + std::string_view key) const { std::string string_key(key); T value; if (!trie_.Find(string_key.c_str(), &value)) { - return absl_ports::NotFoundError(absl_ports::StrCat( - "Key not found ", key, " in KeyMapper ", file_prefix_, ".")); + return absl_ports::NotFoundError( + absl_ports::StrCat("Key not found ", Formatter()(string_key), + " in KeyMapper ", file_prefix_, ".")); } return value; } -template <typename T> -bool KeyMapper<T>::Delete(std::string_view key) { +template <typename T, typename Formatter> +bool KeyMapper<T, Formatter>::Delete(std::string_view key) { return trie_.Delete(key); } -template <typename T> -std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const { +template <typename T, typename Formatter> +std::unordered_map<T, std::string> KeyMapper<T, Formatter>::GetValuesToKeys() + const { std::unordered_map<T, std::string> values_to_keys; for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid(); itr.Advance()) { @@ -277,8 +287,8 @@ std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const { return values_to_keys; } -template <typename T> -libtextclassifier3::Status KeyMapper<T>::PersistToDisk() { +template <typename T, typename Formatter> +libtextclassifier3::Status KeyMapper<T, Formatter>::PersistToDisk() { if (!trie_.Sync()) { return absl_ports::InternalError( absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_)); @@ -287,8 +297,9 @@ libtextclassifier3::Status KeyMapper<T>::PersistToDisk() { return libtextclassifier3::Status::OK; } -template <typename T> -libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const { +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<int64_t> KeyMapper<T, Formatter>::GetDiskUsage() + const { int64_t size = trie_.GetDiskUsage(); if (size == IcingFilesystem::kBadFileSize || size < 0) { return absl_ports::InternalError("Failed to get disk usage of key mapper"); @@ -296,8 +307,9 @@ libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const { return size; } -template <typename T> -libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const { +template <typename T, typename Formatter> +libtextclassifier3::StatusOr<int64_t> KeyMapper<T, Formatter>::GetElementsSize() + const { int64_t size = trie_.GetElementsSize(); if (size == IcingFilesystem::kBadFileSize || size < 0) { return absl_ports::InternalError( @@ -306,8 +318,8 @@ libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const { return size; } -template <typename T> -Crc32 KeyMapper<T>::ComputeChecksum() { +template <typename T, typename Formatter> +Crc32 KeyMapper<T, Formatter>::ComputeChecksum() { return Crc32(trie_.UpdateCrc()); } diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc index 6f7d4df..748a322 100644 --- a/icing/tokenization/language-segmenter_benchmark.cc +++ b/icing/tokenization/language-segmenter_benchmark.cc @@ -27,7 +27,7 @@ // //icing/tokenization:language-segmenter_benchmark // // $ blaze-bin/icing/tokenization/language-segmenter_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/tokenization:language-segmenter depend on @@ -41,7 +41,7 @@ // blaze-bin/icing/tokenization/language-segmenter_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc index fdd4c70..fe8289a 100644 --- a/icing/transform/icu/icu-normalizer_benchmark.cc +++ b/icing/transform/icu/icu-normalizer_benchmark.cc @@ -25,7 +25,7 @@ // //icing/transform/icu:icu-normalizer_benchmark // // $ blaze-bin/icing/transform/icu/icu-normalizer_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // Make target //icing/transform:normalizer depend on @@ -39,7 +39,7 @@ // blaze-bin/icing/transform/icu/icu-normalizer_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/icu-normalizer_benchmark --benchmark_filter=all // --adb // Flag to tell the benchmark that it'll be run on an Android device via adb, diff --git a/icing/transform/map/map-normalizer_benchmark.cc b/icing/transform/map/map-normalizer_benchmark.cc index 8268541..4560329 100644 --- a/icing/transform/map/map-normalizer_benchmark.cc +++ b/icing/transform/map/map-normalizer_benchmark.cc @@ -24,7 +24,7 @@ // //icing/transform/map:map-normalizer_benchmark // // $ blaze-bin/icing/transform/map/map-normalizer_benchmark -// --benchmarks=all +// --benchmark_filter=all // // Run on an Android device: // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" @@ -35,7 +35,7 @@ // blaze-bin/icing/transform/map/map-normalizer_benchmark // /data/local/tmp/ // -// $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmarks=all +// $ adb shell /data/local/tmp/map-normalizer_benchmark --benchmark_filter=all namespace icing { namespace lib { diff --git a/icing/util/fingerprint-util.cc b/icing/util/fingerprint-util.cc new file mode 100644 index 0000000..0ea843f --- /dev/null +++ b/icing/util/fingerprint-util.cc @@ -0,0 +1,48 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/fingerprint-util.h" + +namespace icing { +namespace lib { + +namespace fingerprint_util { + +// A formatter to properly handle a string that is actually just a hash value. +std::string GetFingerprintString(uint64_t fingerprint) { + std::string encoded_fprint; + // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in + // base128 and add 1 to make sure that no byte is '0'. This increases the + // size of the encoded_fprint from 8-bytes to 10-bytes. + while (fingerprint) { + encoded_fprint.push_back((fingerprint & 0x7F) + 1); + fingerprint >>= 7; + } + return encoded_fprint; +} + +uint64_t GetFingerprint(std::string_view fingerprint_string) { + uint64_t fprint = 0; + for (int i = fingerprint_string.length() - 1; i >= 0; --i) { + fprint <<= 7; + char c = fingerprint_string[i] - 1; + fprint |= (c & 0x7F); + } + return fprint; +} + +} // namespace fingerprint_util + +} // namespace lib +} // namespace icing diff --git a/icing/util/fingerprint-util.h b/icing/util/fingerprint-util.h new file mode 100644 index 0000000..9e98617 --- /dev/null +++ b/icing/util/fingerprint-util.h @@ -0,0 +1,47 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_UTIL_FINGERPRINT_UTIL_H_ +#define ICING_UTIL_FINGERPRINT_UTIL_H_ + +#include <cstdint> +#include <string> +#include <string_view> + +namespace icing { +namespace lib { + +namespace fingerprint_util { + +// Converts from a fingerprint to a fingerprint string. +std::string GetFingerprintString(uint64_t fingerprint); + +// Converts from a fingerprint string to a fingerprint. +uint64_t GetFingerprint(std::string_view fingerprint_string); + +// A formatter to properly handle a string that is actually just a hash value. +class FingerprintStringFormatter { + public: + std::string operator()(std::string_view fingerprint_string) { + uint64_t fingerprint = GetFingerprint(fingerprint_string); + return std::to_string(fingerprint); + } +}; + +} // namespace fingerprint_util + +} // namespace lib +} // namespace icing + +#endif // ICING_UTIL_FINGERPRINT_UTIL_H_ diff --git a/icing/util/fingerprint-util_test.cc b/icing/util/fingerprint-util_test.cc new file mode 100644 index 0000000..948c75a --- /dev/null +++ b/icing/util/fingerprint-util_test.cc @@ -0,0 +1,75 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/fingerprint-util.h" + +#include <cstdint> +#include <limits> + +#include "icing/text_classifier/lib3/utils/hash/farmhash.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace icing { +namespace lib { +namespace fingerprint_util { + +namespace { + +using ::testing::Eq; + +TEST(FingerprintUtilTest, ConversionIsReversible) { + std::string str = "foo-bar-baz"; + uint64_t fprint = tc3farmhash::Fingerprint64(str); + std::string fprint_string = GetFingerprintString(fprint); + EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint)); +} + +TEST(FingerprintUtilTest, ZeroConversionIsReversible) { + uint64_t fprint = 0; + std::string fprint_string = GetFingerprintString(fprint); + EXPECT_THAT(GetFingerprint(fprint_string), Eq(fprint)); +} + +TEST(FingerprintUtilTest, MultipleConversionsAreReversible) { + EXPECT_THAT(GetFingerprint(GetFingerprintString(25)), Eq(25)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(766)), Eq(766)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(2305)), Eq(2305)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(6922)), Eq(6922)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(62326)), Eq(62326)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(186985)), Eq(186985)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(560962)), Eq(560962)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(1682893)), Eq(1682893)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(15146065)), Eq(15146065)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(136314613)), Eq(136314613)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(1226831545)), Eq(1226831545)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(11041483933)), + Eq(11041483933)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(2683080596566)), + Eq(2683080596566)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(72443176107373)), + Eq(72443176107373)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(1955965754899162)), + Eq(1955965754899162)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(52811075382277465)), + Eq(52811075382277465)); + EXPECT_THAT(GetFingerprint(GetFingerprintString(4277697105964474945)), + Eq(4277697105964474945)); +} + +} // namespace + +} // namespace fingerprint_util +} // namespace lib +} // namespace icing diff --git a/icing/util/logging.cc b/icing/util/logging.cc new file mode 100644 index 0000000..7f9c70d --- /dev/null +++ b/icing/util/logging.cc @@ -0,0 +1,122 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/logging.h" + +#include <atomic> +#include <exception> +#include <string_view> + +#include "icing/util/logging_raw.h" + +namespace icing { +namespace lib { +namespace { +// Returns pointer to beginning of last /-separated token from file_name. +// file_name should be a pointer to a zero-terminated array of chars. +// E.g., "foo/bar.cc" -> "bar.cc", "foo/" -> "", "foo" -> "foo". +const char *JumpToBasename(const char *file_name) { + if (file_name == nullptr) { + return nullptr; + } + + // Points to the beginning of the last encountered token. + size_t last_token_start = std::string_view(file_name).find_last_of('/'); + if (last_token_start == std::string_view::npos) { + return file_name; + } + return file_name + last_token_start + 1; +} + +// Calculate the logging level value based on priority and verbosity. +constexpr uint32_t CalculateLoggingLevel(LogSeverity::Code priority, + uint16_t verbosity) { + uint32_t logging_level = static_cast<uint16_t>(priority); + logging_level = (logging_level << 16) | verbosity; + return logging_level; +} + +#if defined(ICING_DEBUG_LOGGING) +#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::VERBOSE, 1) +#else +// TODO(b/146903474) Audit our use of INFO and WARNING logs so that they are not +// overused, and then change the default logging level here to INFO. +#define DEFAULT_LOGGING_LEVEL CalculateLoggingLevel(LogSeverity::ERROR, 0) +#endif + +// The current global logging level for Icing, which controls which logs are +// printed based on priority and verbosity. +// +// This needs to be global so that it can be easily accessed from ICING_LOG and +// ICING_VLOG macros spread throughout the entire code base. +// +// The first 16 bits represent the minimal log priority. +// The last 16 bits represent the current verbosity. +std::atomic<uint32_t> global_logging_level = DEFAULT_LOGGING_LEVEL; + +} // namespace + +// Whether we should log according to the current logging level. +bool ShouldLog(LogSeverity::Code severity, int16_t verbosity) { + if (verbosity < 0) { + return false; + } + // Using the relaxed order for better performance because we only need to + // guarantee the atomicity for this specific statement, without the need to + // worry about reordering. + uint32_t curr_logging_level = + global_logging_level.load(std::memory_order_relaxed); + // If severity is less than the the threshold set. + if (static_cast<uint16_t>(severity) < (curr_logging_level >> 16)) { + return false; + } + if (severity == LogSeverity::VERBOSE) { + // return whether the verbosity is within the current verbose level set. + return verbosity <= (curr_logging_level & 0xffff); + } + return true; +} + +bool SetLoggingLevel(LogSeverity::Code priority, int16_t verbosity) { + if (verbosity < 0) { + return false; + } + if (priority > LogSeverity::VERBOSE && verbosity > 0) { + return false; + } + // Using the relaxed order for better performance because we only need to + // guarantee the atomicity for this specific statement, without the need to + // worry about reordering. + global_logging_level.store(CalculateLoggingLevel(priority, verbosity), + std::memory_order_relaxed); + return true; +} + +LogMessage::LogMessage(LogSeverity::Code severity, uint16_t verbosity, + std::string_view tag, const char *file_name, + int line_number) + : severity_(severity), verbosity_(verbosity), tag_(tag) { + stream_ << JumpToBasename(file_name) << ":" << line_number << ": "; +} + +LogMessage::~LogMessage() { + if (ShouldLog(severity_, verbosity_)) { + LowLevelLogging(severity_, tag_, stream_.message); + } + if (severity_ == LogSeverity::FATAL) { + std::terminate(); // Will print a stacktrace (stdout or logcat). + } +} +} // namespace lib +} // namespace icing diff --git a/icing/util/logging.h b/icing/util/logging.h index 9d598fe..cbe1102 100644 --- a/icing/util/logging.h +++ b/icing/util/logging.h @@ -15,14 +15,115 @@ #ifndef ICING_UTIL_LOGGING_H_ #define ICING_UTIL_LOGGING_H_ -#include "icing/text_classifier/lib3/utils/base/logging.h" +#include <atomic> +#include <cstdint> +#include <string> +#include "icing/proto/debug.pb.h" + +// This header provides base/logging.h style macros, ICING_LOG and ICING_VLOG, +// for logging in various platforms. The macros use __android_log_write on +// Android, and log to stdout/stderr on others. It also provides a function +// SetLoggingLevel to control the log severity level for ICING_LOG and verbosity +// for ICING_VLOG. namespace icing { namespace lib { -// TODO(b/146903474) Add verbose level control -#define ICING_VLOG(verbose_level) TC3_VLOG(verbose_level) -#define ICING_LOG(severity) TC3_LOG(severity) +// Whether we should log according to the current logging level. +// The function will always return false when verbosity is negative. +bool ShouldLog(LogSeverity::Code severity, int16_t verbosity = 0); + +// Set the minimal logging priority to be enabled, and the verbose level to see +// from the logs. +// Return false if priority is set higher than VERBOSE but verbosity is not 0. +// The function will always return false when verbosity is negative. +bool SetLoggingLevel(LogSeverity::Code priority, int16_t verbosity = 0); + +// A tiny code footprint string stream for assembling log messages. +struct LoggingStringStream { + LoggingStringStream& stream() { return *this; } + + std::string message; +}; + +template <typename T> +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const T& entry) { + stream.message.append(std::to_string(entry)); + return stream; +} + +template <typename T> +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + T* const entry) { + stream.message.append( + std::to_string(reinterpret_cast<const uint64_t>(entry))); + return stream; +} + +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const char* message) { + stream.message.append(message); + return stream; +} + +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const std::string& message) { + stream.message.append(message); + return stream; +} + +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + std::string_view message) { + stream.message.append(message); + return stream; +} + +template <typename T1, typename T2> +inline LoggingStringStream& operator<<(LoggingStringStream& stream, + const std::pair<T1, T2>& entry) { + stream << "(" << entry.first << ", " << entry.second << ")"; + return stream; +} + +// The class that does all the work behind our ICING_LOG(severity) macros. Each +// ICING_LOG(severity) << obj1 << obj2 << ...; logging statement creates a +// LogMessage temporary object containing a stringstream. Each operator<< adds +// info to that stringstream and the LogMessage destructor performs the actual +// logging. The reason this works is that in C++, "all temporary objects are +// destroyed as the last step in evaluating the full-expression that (lexically) +// contains the point where they were created." For more info, see +// http://en.cppreference.com/w/cpp/language/lifetime. Hence, the destructor is +// invoked after the last << from that logging statement. +class LogMessage { + public: + LogMessage(LogSeverity::Code severity, uint16_t verbosity, + std::string_view tag, const char* file_name, int line_number) + __attribute__((noinline)); + + ~LogMessage() __attribute__((noinline)); + + // Returns the stream associated with the logger object. + LoggingStringStream& stream() { return stream_; } + + private: + const LogSeverity::Code severity_; + const uint16_t verbosity_; + const std::string tag_; + + // Stream that "prints" all info into a string (not to a file). We construct + // here the entire logging message and next print it in one operation. + LoggingStringStream stream_; +}; + +#define ICING_VLOG(verbose_level) \ + ::icing::lib::LogMessage(::icing::lib::LogSeverity::VERBOSE, verbose_level, \ + "icing", __FILE__, __LINE__) \ + .stream() +#define ICING_LOG(severity) \ + ::icing::lib::LogMessage(::icing::lib::LogSeverity::severity, \ + /*verbosity=*/0, "icing", __FILE__, __LINE__) \ + .stream() } // namespace lib } // namespace icing diff --git a/icing/util/logging_raw.cc b/icing/util/logging_raw.cc new file mode 100644 index 0000000..e47a69f --- /dev/null +++ b/icing/util/logging_raw.cc @@ -0,0 +1,96 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/logging_raw.h" + +#include <cstdio> +#include <string> + +// NOTE: this file contains two implementations: one for Android, one for all +// other cases. We always build exactly one implementation. +#if defined(__ANDROID__) + +// Compiled as part of Android. +#include <android/log.h> + +namespace icing { +namespace lib { + +namespace { +// Converts LogSeverity to level for __android_log_write. +int GetAndroidLogLevel(LogSeverity::Code severity) { + switch (severity) { + case LogSeverity::VERBOSE: + return ANDROID_LOG_VERBOSE; + case LogSeverity::DBG: + return ANDROID_LOG_DEBUG; + case LogSeverity::INFO: + return ANDROID_LOG_INFO; + case LogSeverity::WARNING: + return ANDROID_LOG_WARN; + case LogSeverity::ERROR: + return ANDROID_LOG_ERROR; + case LogSeverity::FATAL: + return ANDROID_LOG_FATAL; + } +} +} // namespace + +void LowLevelLogging(LogSeverity::Code severity, const std::string& tag, + const std::string& message) { + const int android_log_level = GetAndroidLogLevel(severity); + __android_log_write(android_log_level, tag.c_str(), message.c_str()); +} + +} // namespace lib +} // namespace icing + +#else // if defined(__ANDROID__) + +// Not on Android: implement LowLevelLogging to print to stderr (see below). +namespace icing { +namespace lib { + +namespace { +// Converts LogSeverity to human-readable text. +const char *LogSeverityToString(LogSeverity::Code severity) { + switch (severity) { + case LogSeverity::VERBOSE: + return "VERBOSE"; + case LogSeverity::DBG: + return "DEBUG"; + case LogSeverity::INFO: + return "INFO"; + case LogSeverity::WARNING: + return "WARNING"; + case LogSeverity::ERROR: + return "ERROR"; + case LogSeverity::FATAL: + return "FATAL"; + } +} +} // namespace + +void LowLevelLogging(LogSeverity::Code severity, const std::string &tag, + const std::string &message) { + // TODO(b/146903474) Do not log to stderr for logs other than FATAL and ERROR. + fprintf(stderr, "[%s] %s : %s\n", LogSeverityToString(severity), tag.c_str(), + message.c_str()); + fflush(stderr); +} + +} // namespace lib +} // namespace icing + +#endif // if defined(__ANDROID__) diff --git a/icing/util/logging_raw.h b/icing/util/logging_raw.h new file mode 100644 index 0000000..99dddb6 --- /dev/null +++ b/icing/util/logging_raw.h @@ -0,0 +1,34 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_UTIL_LOGGING_RAW_H_ +#define ICING_UTIL_LOGGING_RAW_H_ + +#include <string> + +#include "icing/proto/debug.pb.h" + +namespace icing { +namespace lib { + +// Low-level logging primitive. Logs a message, with the indicated log +// severity. From android/log.h: "the tag normally corresponds to the component +// that emits the log message, and should be reasonably small". +void LowLevelLogging(LogSeverity::Code severity, const std::string &tag, + const std::string &message); + +} // namespace lib +} // namespace icing + +#endif // ICING_UTIL_LOGGING_RAW_H_ diff --git a/icing/util/logging_test.cc b/icing/util/logging_test.cc new file mode 100644 index 0000000..d912d30 --- /dev/null +++ b/icing/util/logging_test.cc @@ -0,0 +1,143 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/util/logging.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/proto/debug.pb.h" +#include "icing/util/logging_raw.h" + +namespace icing { +namespace lib { + +namespace { + +TEST(LoggingTest, SetLoggingLevelWithInvalidArguments) { + EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 1)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 1)); + + EXPECT_FALSE(SetLoggingLevel(LogSeverity::DBG, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::INFO, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::WARNING, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::ERROR, 2)); + EXPECT_FALSE(SetLoggingLevel(LogSeverity::FATAL, 2)); + + EXPECT_FALSE(SetLoggingLevel(LogSeverity::VERBOSE, -1)); +} + +TEST(LoggingTest, SetLoggingLevelTest) { + // Set to INFO + ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + + // Set to WARNING + ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + + // Set to DEBUG + ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); +} + +TEST(LoggingTest, VerboseLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, VerboseLoggingIsControlledByVerbosity) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 2)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 3)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 2)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1)); + + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 2)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 1)); + + ASSERT_TRUE(SetLoggingLevel(LogSeverity::VERBOSE, 0)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::VERBOSE, 0)); + + // Negative verbosity is invalid. + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, -1)); +} + +TEST(LoggingTest, DebugLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_TRUE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, InfoLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_TRUE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, WarningLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::WARNING)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_TRUE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, ErrorLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::ERROR)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::WARNING)); + EXPECT_TRUE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +TEST(LoggingTest, FatalLoggingTest) { + ASSERT_TRUE(SetLoggingLevel(LogSeverity::FATAL)); + EXPECT_FALSE(ShouldLog(LogSeverity::VERBOSE, 1)); + EXPECT_FALSE(ShouldLog(LogSeverity::DBG)); + EXPECT_FALSE(ShouldLog(LogSeverity::INFO)); + EXPECT_FALSE(ShouldLog(LogSeverity::WARNING)); + EXPECT_FALSE(ShouldLog(LogSeverity::ERROR)); + EXPECT_TRUE(ShouldLog(LogSeverity::FATAL)); +} + +} // namespace +} // namespace lib +} // namespace icing diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java index 95e0c84..ea96b67 100644 --- a/java/src/com/google/android/icing/IcingSearchEngine.java +++ b/java/src/com/google/android/icing/IcingSearchEngine.java @@ -16,6 +16,7 @@ package com.google.android.icing; import android.util.Log; import androidx.annotation.NonNull; +import com.google.android.icing.proto.DebugInfoResultProto; import com.google.android.icing.proto.DeleteByNamespaceResultProto; import com.google.android.icing.proto.DeleteByQueryResultProto; import com.google.android.icing.proto.DeleteBySchemaTypeResultProto; @@ -29,6 +30,7 @@ import com.google.android.icing.proto.GetSchemaResultProto; import com.google.android.icing.proto.GetSchemaTypeResultProto; import com.google.android.icing.proto.IcingSearchEngineOptions; import com.google.android.icing.proto.InitializeResultProto; +import com.google.android.icing.proto.LogSeverity; import com.google.android.icing.proto.OptimizeResultProto; import com.google.android.icing.proto.PersistToDiskResultProto; import com.google.android.icing.proto.PersistType; @@ -74,7 +76,9 @@ public class IcingSearchEngine implements Closeable { System.loadLibrary("icing"); } - /** @throws IllegalStateException if IcingSearchEngine fails to be created */ + /** + * @throws IllegalStateException if IcingSearchEngine fails to be created + */ public IcingSearchEngine(@NonNull IcingSearchEngineOptions options) { nativePointer = nativeCreate(options.toByteArray()); if (nativePointer == 0) { @@ -439,9 +443,16 @@ public class IcingSearchEngine implements Closeable { @NonNull public DeleteByQueryResultProto deleteByQuery(@NonNull SearchSpecProto searchSpec) { + return deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ false); + } + + @NonNull + public DeleteByQueryResultProto deleteByQuery( + @NonNull SearchSpecProto searchSpec, boolean returnDeletedDocumentInfo) { throwIfClosed(); - byte[] deleteResultBytes = nativeDeleteByQuery(this, searchSpec.toByteArray()); + byte[] deleteResultBytes = + nativeDeleteByQuery(this, searchSpec.toByteArray(), returnDeletedDocumentInfo); if (deleteResultBytes == null) { Log.e(TAG, "Received null DeleteResultProto from native."); return DeleteByQueryResultProto.newBuilder() @@ -539,8 +550,7 @@ public class IcingSearchEngine implements Closeable { } try { - return StorageInfoResultProto.parseFrom( - storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE); + return StorageInfoResultProto.parseFrom(storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE); } catch (InvalidProtocolBufferException e) { Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e); return StorageInfoResultProto.newBuilder() @@ -550,6 +560,28 @@ public class IcingSearchEngine implements Closeable { } @NonNull + public DebugInfoResultProto getDebugInfo(int verbosity) { + throwIfClosed(); + + byte[] debugInfoResultProtoBytes = nativeGetDebugInfo(this, verbosity); + if (debugInfoResultProtoBytes == null) { + Log.e(TAG, "Received null DebugInfoResultProto from native."); + return DebugInfoResultProto.newBuilder() + .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL)) + .build(); + } + + try { + return DebugInfoResultProto.parseFrom(debugInfoResultProtoBytes, EXTENSION_REGISTRY_LITE); + } catch (InvalidProtocolBufferException e) { + Log.e(TAG, "Error parsing DebugInfoResultProto.", e); + return DebugInfoResultProto.newBuilder() + .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL)) + .build(); + } + } + + @NonNull public ResetResultProto reset() { throwIfClosed(); @@ -571,6 +603,22 @@ public class IcingSearchEngine implements Closeable { } } + public static boolean shouldLog(LogSeverity.Code severity) { + return shouldLog(severity, (short) 0); + } + + public static boolean shouldLog(LogSeverity.Code severity, short verbosity) { + return nativeShouldLog((short) severity.getNumber(), verbosity); + } + + public static boolean setLoggingLevel(LogSeverity.Code priority) { + return setLoggingLevel(priority, (short) 0); + } + + public static boolean setLoggingLevel(LogSeverity.Code priority, short verbosity) { + return nativeSetLoggingLevel((short) priority.getNumber(), verbosity); + } + private static native long nativeCreate(byte[] icingSearchEngineOptionsBytes); private static native void nativeDestroy(IcingSearchEngine instance); @@ -615,7 +663,7 @@ public class IcingSearchEngine implements Closeable { IcingSearchEngine instance, String schemaType); private static native byte[] nativeDeleteByQuery( - IcingSearchEngine instance, byte[] searchSpecBytes); + IcingSearchEngine instance, byte[] searchSpecBytes, boolean returnDeletedDocumentInfo); private static native byte[] nativePersistToDisk(IcingSearchEngine instance, int persistType); @@ -629,4 +677,10 @@ public class IcingSearchEngine implements Closeable { private static native byte[] nativeSearchSuggestions( IcingSearchEngine instance, byte[] suggestionSpecBytes); + + private static native byte[] nativeGetDebugInfo(IcingSearchEngine instance, int verbosity); + + private static native boolean nativeShouldLog(short severity, short verbosity); + + private static native boolean nativeSetLoggingLevel(short priority, short verbosity); } diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java index a46814c..a6d160c 100644 --- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java +++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java @@ -17,6 +17,7 @@ package com.google.android.icing; import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; +import com.google.android.icing.proto.DebugInfoResultProto; import com.google.android.icing.proto.DeleteByNamespaceResultProto; import com.google.android.icing.proto.DeleteByQueryResultProto; import com.google.android.icing.proto.DeleteBySchemaTypeResultProto; @@ -30,6 +31,7 @@ import com.google.android.icing.proto.GetSchemaResultProto; import com.google.android.icing.proto.GetSchemaTypeResultProto; import com.google.android.icing.proto.IcingSearchEngineOptions; import com.google.android.icing.proto.InitializeResultProto; +import com.google.android.icing.proto.LogSeverity; import com.google.android.icing.proto.OptimizeResultProto; import com.google.android.icing.proto.PersistToDiskResultProto; import com.google.android.icing.proto.PersistType; @@ -389,6 +391,60 @@ public final class IcingSearchEngineTest { DeleteByQueryResultProto deleteResultProto = icingSearchEngine.deleteByQuery(searchSpec); assertStatusOk(deleteResultProto.getStatus()); + // By default, the deleteByQuery API does not return the summary about deleted documents, unless + // the returnDeletedDocumentInfo parameter is set to true. + assertThat(deleteResultProto.getDeletedDocumentsList()).isEmpty(); + + GetResultProto getResultProto = + icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance()); + assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND); + getResultProto = + icingSearchEngine.get("namespace", "uri2", GetResultSpecProto.getDefaultInstance()); + assertStatusOk(getResultProto.getStatus()); + } + + @Test + public void testDeleteByQueryWithDeletedDocumentInfo() throws Exception { + assertStatusOk(icingSearchEngine.initialize().getStatus()); + + SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig(); + SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build(); + assertThat( + icingSearchEngine + .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false) + .getStatus() + .getCode()) + .isEqualTo(StatusProto.Code.OK); + + DocumentProto emailDocument1 = + createEmailDocument("namespace", "uri1").toBuilder() + .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo")) + .build(); + + assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus()); + DocumentProto emailDocument2 = + createEmailDocument("namespace", "uri2").toBuilder() + .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar")) + .build(); + + assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus()); + + SearchSpecProto searchSpec = + SearchSpecProto.newBuilder() + .setQuery("foo") + .setTermMatchType(TermMatchType.Code.PREFIX) + .build(); + + DeleteByQueryResultProto deleteResultProto = + icingSearchEngine.deleteByQuery(searchSpec, /*returnDeletedDocumentInfo=*/ true); + assertStatusOk(deleteResultProto.getStatus()); + DeleteByQueryResultProto.DocumentGroupInfo info = + DeleteByQueryResultProto.DocumentGroupInfo.newBuilder() + .setNamespace("namespace") + .setSchema("Email") + .addUris("uri1") + .build(); + assertThat(deleteResultProto.getDeletedDocumentsList()).containsExactly(info); GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance()); @@ -434,6 +490,14 @@ public final class IcingSearchEngineTest { } @Test + public void testGetDebugInfo() throws Exception { + assertStatusOk(icingSearchEngine.initialize().getStatus()); + + DebugInfoResultProto debugInfoResultProto = icingSearchEngine.getDebugInfo(/*verbosity= */ 1); + assertStatusOk(debugInfoResultProto.getStatus()); + } + + @Test public void testGetAllNamespaces() throws Exception { assertStatusOk(icingSearchEngine.initialize().getStatus()); @@ -668,6 +732,29 @@ public final class IcingSearchEngineTest { assertThat(response.getSuggestions(1).getQuery()).isEqualTo("fo"); } + @Test + public void testLogging() throws Exception { + // Set to INFO + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.INFO)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isFalse(); + + // Set to WARNING + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.WARNING)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.WARNING)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.INFO)).isFalse(); + + // Set to DEBUG + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.DBG)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.DBG)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE)).isFalse(); + + // Set to VERBOSE + assertThat(IcingSearchEngine.setLoggingLevel(LogSeverity.Code.VERBOSE, (short) 1)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 1)).isTrue(); + assertThat(IcingSearchEngine.shouldLog(LogSeverity.Code.VERBOSE, (short) 2)).isFalse(); + } + private static void assertStatusOk(StatusProto status) { assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK); } diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto index 504ae43..061e123 100644 --- a/proto/icing/proto/debug.proto +++ b/proto/icing/proto/debug.proto @@ -24,6 +24,98 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; +message LogSeverity { + enum Code { + VERBOSE = 0; + // Unable to use DEBUG at this time because it breaks YTM's iOS tests + // cs/?q=%22-DDEBUG%3D1%22%20f:%2FYoutubeMusic%20f:blueprint&ssfr=1 + DBG = 1; + INFO = 2; + WARNING = 3; + ERROR = 4; + FATAL = 5; + } +} + +// Next tag: 6 +message LexiconDebugInfoProto { + optional uint32 num_keys = 1; + + // Next tag: 12 + message NodeInfo { + optional uint32 num_nodes = 1; + optional uint32 max_nodes = 2; + + // Count of intermediate nodes. + optional uint32 num_intermediates = 3; + // Total, maximum and average number of children of intermediate nodes. + optional uint32 sum_children = 4; + optional uint32 max_children = 5; + optional float avg_children = 6; // sum_children / num_intermediates + + // Count of leaf nodes. + optional uint32 num_leaves = 7; + // Total, maximum and average depth of leaf nodes. + optional uint32 sum_depth = 8; + optional uint32 max_depth = 9; + optional float avg_depth = 10; // sum_depth / num_leaves; + + optional uint32 dirty_pages = 11; + } + optional NodeInfo node_info = 2; + + // Next tag: 10 + message NextInfo { + optional uint32 num_nexts = 1; + optional uint32 max_nexts = 2; + // Count of next arrays by size. Need verbosity > 0. + // Index into child_counts is the number of children minus 1 and the value + // is the number of intermediate nodes that have that number of children. + repeated uint32 child_counts = 3 + [packed = true]; // Array of size kMaxNextArraySize + // Wasted next array space per allocation bucket (in Nexts, not bytes). + // Index into wasted is log2(bucket_size). + // Ex. wasted[3] is the number of nexts wasted from all next arrays of size + // 2^3=8. + repeated uint32 wasted = 4 + [packed = true]; // Array of size kNumNextAllocationBuckets + // Sum of wasted array. + optional uint32 total_wasted = 5; + // Next free-list stats + // Index into num_free is log2(bucket_size). + // Ex. num_free[3] is the number of free lists of size 2^3=8. + repeated uint32 num_free = 6 + [packed = true]; // Array of size kNumNextAllocationBuckets + // Total Next nodes free (weighted sum of the above). + optional uint32 total_free = 7; + // A measure of the fragment of next arrays. + optional float total_frag = 8; // (total_free + total_wasted) / num_nexts + optional uint32 dirty_pages = 9; + } + optional NextInfo next_info = 3; + + // Next tag: 6 + message SuffixInfo { + optional uint32 suffixes_capacity = 1; + optional uint32 max_suffixes_capacity = 2; + // Bytes actually used by suffixes. + optional uint32 suffixes_used = 3; + // Number of suffixes that are just empty strings. + optional uint32 num_null_suffixes = 4; + optional uint32 dirty_pages = 5; + } + optional SuffixInfo suffix_info = 4; + + // Next tag: 3 + message PropertyBitmapInfo { + // Property id. -1 represents deleted bitmap. + optional int32 property_id = 1; + optional uint32 file_size = 2; + } + // For verbosity > 0 + repeated PropertyBitmapInfo property_bitmaps_info = 5; +} + // Next tag: 4 message IndexDebugInfoProto { // Storage information of the index. @@ -31,8 +123,7 @@ message IndexDebugInfoProto { message MainIndexDebugInfoProto { // Information about the main lexicon. - // TODO(b/222349894) Convert the string output to a protocol buffer instead. - optional string lexicon_info = 1; + optional LexiconDebugInfoProto lexicon_info = 1; // Last added document id. optional uint32 last_added_document_id = 2; @@ -62,8 +153,7 @@ message IndexDebugInfoProto { optional uint32 index_crc = 5; // Information about the lite lexicon. - // TODO(b/222349894) Convert the string output to a protocol buffer instead. - optional string lexicon_info = 6; + optional LexiconDebugInfoProto lexicon_info = 6; } optional LiteIndexDebugInfoProto lite_index_info = 3; } @@ -117,7 +207,8 @@ message DebugInfoProto { message DebugInfoResultProto { // Status code can be one of: // OK - // FAILED_PRECONDITION + // FAILED_PRECONDITION if IcingSearchEngine has not been initialized yet + // INTERNAL on IO errors, crc compute error. // // See status.proto for more details. optional StatusProto status = 1; diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 73d349b..0f0041e 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=436284873) +set(synced_AOSP_CL_number=443464881) |