diff options
author | Tim Barron <tjbarron@google.com> | 2022-03-21 15:11:34 -0700 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2022-03-21 15:11:34 -0700 |
commit | c5fa7ff3ae4043c396e81087c0570fd16322341d (patch) | |
tree | e9277f0999715b0f145d6c5bc258ef69a980eb08 | |
parent | d3c3b447a8243f47a155ea4a97ae9d95f7e7f210 (diff) | |
download | icing-c5fa7ff3ae4043c396e81087c0570fd16322341d.tar.gz |
Sync from upstream.
Descriptions:
======================================================================
Add some additional logging that will help diagnose b/218413237
======================================================================
Mark VerbatimTokenizer::ResetToTokenStartingAfter as 'override'.
======================================================================
Support dump function for SchemaStore
======================================================================
Bug: 218413237
Change-Id: I8554dc14b9e9b5aec56e4e9a3b3aa07c0e7d0f1b
-rw-r--r-- | icing/schema/schema-store.cc | 12 | ||||
-rw-r--r-- | icing/schema/schema-store.h | 12 | ||||
-rw-r--r-- | icing/schema/schema-store_test.cc | 33 | ||||
-rw-r--r-- | icing/store/document-store.cc | 12 | ||||
-rw-r--r-- | icing/tokenization/verbatim-tokenizer.cc | 2 | ||||
-rw-r--r-- | proto/icing/proto/debug.proto | 15 | ||||
-rw-r--r-- | synced_AOSP_CL_number.txt | 2 |
7 files changed, 83 insertions, 5 deletions
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index b5c976f..acc5030 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -493,5 +493,17 @@ SchemaStore::GetSectionMetadata(const std::string& schema_type) const { return section_manager_->GetMetadataList(schema_type); } +libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo() + const { + SchemaDebugInfoProto debug_info; + if (has_schema_successfully_set_) { + ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema()); + *debug_info.mutable_schema() = *schema; + } + ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum()); + debug_info.set_crc(crc.Get()); + return debug_info; +} + } // namespace lib } // namespace icing diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h index 6b6528d..2d3aca7 100644 --- a/icing/schema/schema-store.h +++ b/icing/schema/schema-store.h @@ -26,6 +26,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" +#include "icing/proto/debug.pb.h" #include "icing/proto/document.pb.h" #include "icing/proto/logging.pb.h" #include "icing/proto/schema.pb.h" @@ -137,9 +138,7 @@ class SchemaStore { // Persists and updates checksum of subcomponents. ~SchemaStore(); - // Retrieve the current schema if it exists. Caller does not get ownership of - // the schema proto and modifying the returned pointer does not affect the - // underlying schema proto. + // Retrieve the current schema if it exists. // // Returns: // SchemaProto* if exists @@ -258,6 +257,13 @@ class SchemaStore { // that field will be set to -1. SchemaStoreStorageInfoProto GetStorageInfo() const; + // Get debug information for the schema store. + // + // Returns: + // SchemaDebugInfoProto on success + // INTERNAL_ERROR on IO errors, crc compute error + libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const; + private: // Use SchemaStore::Create instead. explicit SchemaStore(const Filesystem* filesystem, std::string base_dir, diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc index 974af63..113084e 100644 --- a/icing/schema/schema-store_test.cc +++ b/icing/schema/schema-store_test.cc @@ -44,6 +44,7 @@ using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::ElementsAre; using ::testing::Eq; using ::testing::Ge; +using ::testing::Gt; using ::testing::Not; using ::testing::Pointee; @@ -868,6 +869,38 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) { EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1)); } +TEST_F(SchemaStoreTest, GetDebugInfo) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + + // Set schema + ASSERT_THAT( + schema_store->SetSchema(schema_), + IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{ + .success = true, + .schema_types_new_by_name = {schema_.types(0).schema_type()}}))); + + // Check debug info + ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out, + schema_store->GetDebugInfo()); + EXPECT_THAT(out.schema(), EqualsProto(schema_)); + EXPECT_THAT(out.crc(), Gt(0)); +} + +TEST_F(SchemaStoreTest, GetDebugInfoForEmptySchemaStore) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + + // Check debug info before setting a schema + ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out, + schema_store->GetDebugInfo()); + SchemaDebugInfoProto expected_out; + expected_out.set_crc(0); + EXPECT_THAT(out, EqualsProto(expected_out)); +} + } // namespace } // namespace lib diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index a2ae5f9..8c8369c 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -431,7 +431,19 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles( // Iterates through document log auto iterator = document_log_->GetIterator(); auto iterator_status = iterator.Advance(); + libtextclassifier3::StatusOr<int64_t> element_size = + document_log_->GetElementsFileSize(); + libtextclassifier3::StatusOr<int64_t> disk_usage = + document_log_->GetDiskUsage(); + if (element_size.ok() && disk_usage.ok()) { + ICING_VLOG(1) << "Starting recovery of document store. Document store " + "elements file size:" + << element_size.ValueOrDie() + << ", disk usage=" << disk_usage.ValueOrDie(); + } while (iterator_status.ok()) { + ICING_VLOG(2) << "Attempting to read document at offset=" + << iterator.GetOffset(); libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or = document_log_->ReadProto(iterator.GetOffset()); diff --git a/icing/tokenization/verbatim-tokenizer.cc b/icing/tokenization/verbatim-tokenizer.cc index dc54696..0d3a320 100644 --- a/icing/tokenization/verbatim-tokenizer.cc +++ b/icing/tokenization/verbatim-tokenizer.cc @@ -72,7 +72,7 @@ class VerbatimTokenIterator : public Tokenizer::Iterator { } } - bool ResetToTokenStartingAfter(int32_t utf32_offset) { + bool ResetToTokenStartingAfter(int32_t utf32_offset) override { // We can only reset to the sole verbatim token, so we must have a negative // offset for it to be considered the token after. if (utf32_offset < 0) { diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto index 3f07539..504ae43 100644 --- a/proto/icing/proto/debug.proto +++ b/proto/icing/proto/debug.proto @@ -16,6 +16,7 @@ syntax = "proto2"; package icing.lib; +import "icing/proto/schema.proto"; import "icing/proto/status.proto"; import "icing/proto/storage.proto"; @@ -90,12 +91,26 @@ message DocumentDebugInfoProto { } // Next tag: 3 +message SchemaDebugInfoProto { + // Copy of the SchemaProto if it has been set in the schema store. + // Modifying this does not affect the Schema that IcingSearchEngine holds. + optional SchemaProto schema = 1; + + // The most recent checksum of the schema store, by calling + // SchemaStore::ComputeChecksum(). + optional uint32 crc = 2; +} + +// Next tag: 4 message DebugInfoProto { // Debug information of the index. optional IndexDebugInfoProto index_info = 1; // Debug information of the document store. optional DocumentDebugInfoProto document_info = 2; + + // Debug information of the schema store. + optional SchemaDebugInfoProto schema_info = 3; } // Next tag: 3 diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 83c7099..73d349b 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=435140515) +set(synced_AOSP_CL_number=436284873) |