diff options
author | Cassie Wang <cassiewang@google.com> | 2021-07-07 15:42:33 -0700 |
---|---|---|
committer | Cassie Wang <cassiewang@google.com> | 2021-07-08 12:02:37 -0700 |
commit | a452e10c9eb9b6d79508e9e81c82739d5e2b6543 (patch) | |
tree | 5498f3656d62a2eece26be662ed0e2b8eb7eca0b | |
parent | 8ed9ccef5cd49d935a322118c8697da411f4ae51 (diff) | |
download | icing-a452e10c9eb9b6d79508e9e81c82739d5e2b6543.tar.gz |
Enforce the kMaxDocumentId limit.
If we exceed ~1 million documents, Icing lib will properly throw an
OUT_OF_SPACE error now. Previously, undefined behavior would have
occurred as our DocumentId could not properly store values greater than
2^20.
Bug: 192464981
Test: framework presubmit
Test: manually copied change to google3 and ran all icing lib c++ tests
Test: manually copied change to google3 and verified
BM_PutMaxAllowedDocuments succeeded
Change-Id: I1c4871c066bfcf2a4c1e8c51a9115fe64dbf5c88
-rw-r--r-- | icing/icing-search-engine.h | 1 | ||||
-rw-r--r-- | icing/icing-search-engine_benchmark.cc | 54 | ||||
-rw-r--r-- | icing/store/document-store.cc | 10 | ||||
-rw-r--r-- | icing/store/document-store.h | 1 |
4 files changed, 65 insertions, 1 deletions
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index 3dc7e29..855401f 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -178,6 +178,7 @@ class IcingSearchEngine { // // Returns: // OK on success + // OUT_OF_SPACE if exceeds maximum number of allowed documents // FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine // has not been initialized yet. // NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index 316b74f..ba9aed1 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -70,6 +70,7 @@ namespace lib { namespace { using ::testing::Eq; +using ::testing::HasSubstr; // Icing GMSCore has, on average, 17 corpora on a device and 30 corpora at the // 95th pct. Most clients use a single type. This is a function of Icing's @@ -691,6 +692,59 @@ void BM_Delete(benchmark::State& state) { } BENCHMARK(BM_Delete); +void BM_PutMaxAllowedDocuments(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TermMatchType::PREFIX, + StringIndexingConfig::TokenizerType::PLAIN) + .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))) + .Build(); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + // Create a document that has the term "foo" + DocumentProto base_document = DocumentBuilder() + .SetSchema("Message") + .SetNamespace("namespace") + .AddStringProperty("body", "foo") + .Build(); + + // Insert a lot of documents with the term "foo" + for (auto s : state) { + for (int64_t i = 0; i <= kMaxDocumentId; ++i) { + DocumentProto document = + DocumentBuilder(base_document).SetUri(std::to_string(i)).Build(); + EXPECT_THAT(icing->Put(document).status(), ProtoIsOk()); + } + } + + DocumentProto document = + DocumentBuilder(base_document).SetUri("out_of_space_uri").Build(); + PutResultProto put_result_proto = icing->Put(document); + EXPECT_THAT(put_result_proto.status(), + ProtoStatusIs(StatusProto::OUT_OF_SPACE)); + EXPECT_THAT(put_result_proto.status().message(), + HasSubstr("Exceeded maximum number of documents")); +} +BENCHMARK(BM_PutMaxAllowedDocuments); + } // namespace } // namespace lib diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 907bace..81edce1 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -73,7 +73,9 @@ constexpr char kNamespaceMapperFilename[] = "namespace_mapper"; constexpr char kUsageStoreDirectoryName[] = "usage_store"; constexpr char kCorpusIdMapperFilename[] = "corpus_mapper"; -constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB +// Determined through manual testing to allow for 1 million uris. 1 million +// because we allow up to 1 million DocumentIds. +constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB // 384 KiB for a KeyMapper would allow each internal array to have a max of // 128 KiB for storage. @@ -805,6 +807,12 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut( // Creates a new document id, updates key mapper and document_id mapper DocumentId new_document_id = document_id_mapper_->num_elements(); + if (!IsDocumentIdValid(new_document_id)) { + return absl_ports::ResourceExhaustedError( + "Exceeded maximum number of documents. Try calling Optimize to reclaim " + "some space."); + } + ICING_RETURN_IF_ERROR(document_key_mapper_->Put( MakeFingerprint(name_space, uri), new_document_id)); ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset)); diff --git a/icing/store/document-store.h b/icing/store/document-store.h index 79d99d4..a60aab1 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -156,6 +156,7 @@ class DocumentStore { // // Returns: // A newly generated document id on success + // RESOURCE_EXHAUSED if exceeds maximum number of allowed documents // FAILED_PRECONDITION if schema hasn't been set yet // NOT_FOUND if the schema_type or a property config of the document doesn't // exist in schema |