aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2021-07-07 15:42:33 -0700
committerCassie Wang <cassiewang@google.com>2021-07-08 12:02:37 -0700
commita452e10c9eb9b6d79508e9e81c82739d5e2b6543 (patch)
tree5498f3656d62a2eece26be662ed0e2b8eb7eca0b
parent8ed9ccef5cd49d935a322118c8697da411f4ae51 (diff)
downloadicing-a452e10c9eb9b6d79508e9e81c82739d5e2b6543.tar.gz
Enforce the kMaxDocumentId limit.
If we exceed ~1 million documents, Icing lib will properly throw an OUT_OF_SPACE error now. Previously, undefined behavior would have occurred as our DocumentId could not properly store values greater than 2^20. Bug: 192464981 Test: framework presubmit Test: manually copied change to google3 and ran all icing lib c++ tests Test: manually copied change to google3 and verified BM_PutMaxAllowedDocuments succeeded Change-Id: I1c4871c066bfcf2a4c1e8c51a9115fe64dbf5c88
-rw-r--r--icing/icing-search-engine.h1
-rw-r--r--icing/icing-search-engine_benchmark.cc54
-rw-r--r--icing/store/document-store.cc10
-rw-r--r--icing/store/document-store.h1
4 files changed, 65 insertions, 1 deletions
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 3dc7e29..855401f 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -178,6 +178,7 @@ class IcingSearchEngine {
//
// Returns:
// OK on success
+ // OUT_OF_SPACE if exceeds maximum number of allowed documents
// FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine
// has not been initialized yet.
// NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 316b74f..ba9aed1 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -70,6 +70,7 @@ namespace lib {
namespace {
using ::testing::Eq;
+using ::testing::HasSubstr;
// Icing GMSCore has, on average, 17 corpora on a device and 30 corpora at the
// 95th pct. Most clients use a single type. This is a function of Icing's
@@ -691,6 +692,59 @@ void BM_Delete(benchmark::State& state) {
}
BENCHMARK(BM_Delete);
+void BM_PutMaxAllowedDocuments(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document that has the term "foo"
+ DocumentProto base_document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .AddStringProperty("body", "foo")
+ .Build();
+
+ // Insert a lot of documents with the term "foo"
+ for (auto s : state) {
+ for (int64_t i = 0; i <= kMaxDocumentId; ++i) {
+ DocumentProto document =
+ DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
+ EXPECT_THAT(icing->Put(document).status(), ProtoIsOk());
+ }
+ }
+
+ DocumentProto document =
+ DocumentBuilder(base_document).SetUri("out_of_space_uri").Build();
+ PutResultProto put_result_proto = icing->Put(document);
+ EXPECT_THAT(put_result_proto.status(),
+ ProtoStatusIs(StatusProto::OUT_OF_SPACE));
+ EXPECT_THAT(put_result_proto.status().message(),
+ HasSubstr("Exceeded maximum number of documents"));
+}
+BENCHMARK(BM_PutMaxAllowedDocuments);
+
} // namespace
} // namespace lib
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 907bace..81edce1 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -73,7 +73,9 @@ constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
constexpr char kUsageStoreDirectoryName[] = "usage_store";
constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
-constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB
+// Determined through manual testing to allow for 1 million uris. 1 million
+// because we allow up to 1 million DocumentIds.
+constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB
// 384 KiB for a KeyMapper would allow each internal array to have a max of
// 128 KiB for storage.
@@ -805,6 +807,12 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
// Creates a new document id, updates key mapper and document_id mapper
DocumentId new_document_id = document_id_mapper_->num_elements();
+ if (!IsDocumentIdValid(new_document_id)) {
+ return absl_ports::ResourceExhaustedError(
+ "Exceeded maximum number of documents. Try calling Optimize to reclaim "
+ "some space.");
+ }
+
ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
MakeFingerprint(name_space, uri), new_document_id));
ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 79d99d4..a60aab1 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -156,6 +156,7 @@ class DocumentStore {
//
// Returns:
// A newly generated document id on success
+ // RESOURCE_EXHAUSED if exceeds maximum number of allowed documents
// FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND if the schema_type or a property config of the document doesn't
// exist in schema