diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-10-10 16:08:32 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-10-10 16:08:32 +0000 |
commit | f002cea816856d6f95111f4eb65ed42e1f1e2526 (patch) | |
tree | f6c3e8206a8372e464c6d2576ca90821f2df300b | |
parent | 6d8a137a71163e8594c2cd3ba5f0d269571c3a24 (diff) | |
parent | fb3ae39e2f6ece0a75b3670b7256587d18bc81ff (diff) | |
download | icing-f002cea816856d6f95111f4eb65ed42e1f1e2526.tar.gz |
Snap for 9157479 from fb3ae39e2f6ece0a75b3670b7256587d18bc81ff to mainline-tzdata4-release
Change-Id: Iee00c6f3084d1b09dd033ec433dba43ed7b5ebd6
-rw-r--r-- | icing/icing-search-engine.cc | 3 | ||||
-rw-r--r-- | icing/icing-search-engine_test.cc | 48 | ||||
-rw-r--r-- | icing/index/index.cc | 6 | ||||
-rw-r--r-- | icing/index/index.h | 5 | ||||
-rw-r--r-- | icing/index/index_test.cc | 48 | ||||
-rw-r--r-- | icing/index/lite/lite-index.cc | 10 | ||||
-rw-r--r-- | icing/index/lite/lite-index.h | 5 | ||||
-rw-r--r-- | icing/index/main/main-index.h | 3 |
8 files changed, 99 insertions, 29 deletions
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 4089ec9..4be4ac3 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -1259,7 +1259,8 @@ OptimizeResultProto IcingSearchEngine::Optimize() { optimize_stats->set_index_restoration_mode( OptimizeStatsProto::INDEX_TRANSLATION); libtextclassifier3::Status index_optimize_status = - index_->Optimize(document_id_old_to_new_or.ValueOrDie()); + index_->Optimize(document_id_old_to_new_or.ValueOrDie(), + document_store_->last_added_document_id()); if (!index_optimize_status.ok()) { ICING_LOG(WARNING) << "Failed to optimize index. Error: " << index_optimize_status.error_message(); diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 2ac456e..f862e45 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -3003,6 +3003,54 @@ TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) { EXPECT_THAT(icing.Put(document5).status(), ProtoIsOk()); } +TEST_F(IcingSearchEngineTest, + GetAndPutShouldWorkAfterOptimizationWithEmptyDocuments) { + DocumentProto empty_document1 = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Message") + .AddStringProperty("body", "") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto empty_document2 = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Message") + .AddStringProperty("body", "") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto empty_document3 = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetSchema("Message") + .AddStringProperty("body", "") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + ASSERT_THAT(icing.Put(empty_document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(empty_document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk()); + ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); + + // Validates that Get() and Put() are good right after Optimize() + *expected_get_result_proto.mutable_document() = empty_document1; + EXPECT_THAT( + icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()), + EqualsProto(expected_get_result_proto)); + EXPECT_THAT( + icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) + .status() + .code(), + Eq(StatusProto::NOT_FOUND)); + EXPECT_THAT(icing.Put(empty_document3).status(), ProtoIsOk()); +} + TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) { DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); diff --git a/icing/index/index.cc b/icing/index/index.cc index 1d863cc..6004ed3 100644 --- a/icing/index/index.cc +++ b/icing/index/index.cc @@ -265,11 +265,13 @@ IndexStorageInfoProto Index::GetStorageInfo() const { } libtextclassifier3::Status Index::Optimize( - const std::vector<DocumentId>& document_id_old_to_new) { + const std::vector<DocumentId>& document_id_old_to_new, + DocumentId new_last_added_document_id) { if (main_index_->last_added_document_id() != kInvalidDocumentId) { ICING_RETURN_IF_ERROR(main_index_->Optimize(document_id_old_to_new)); } - return lite_index_->Optimize(document_id_old_to_new, term_id_codec_.get()); + return lite_index_->Optimize(document_id_old_to_new, term_id_codec_.get(), + new_last_added_document_id); } libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) { diff --git a/icing/index/index.h b/icing/index/index.h index 748acb0..55f2358 100644 --- a/icing/index/index.h +++ b/icing/index/index.h @@ -264,13 +264,16 @@ class Index { } // Reduces internal file sizes by reclaiming space of deleted documents. + // new_last_added_document_id will be used to update the last added document + // id in the lite index. // // Returns: // OK on success // INTERNAL_ERROR on IO error, this indicates that the index may be in an // invalid state and should be cleared. libtextclassifier3::Status Optimize( - const std::vector<DocumentId>& document_id_old_to_new); + const std::vector<DocumentId>& document_id_old_to_new, + DocumentId new_last_added_document_id); private: Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec, diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc index 7323603..23945de 100644 --- a/icing/index/index_test.cc +++ b/icing/index/index_test.cc @@ -267,7 +267,8 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) { EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk()); index_->set_last_added_document_id(kDocumentId2); - ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2})); + ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2}, + /*new_last_added_document_id=*/2)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre(EqualsDocHitInfo( kDocumentId2, std::vector<SectionId>{kSectionId2})))); @@ -275,7 +276,8 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) { // Mapping to a different docid will translate the hit ICING_ASSERT_OK(index_->Optimize( - /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1})); + /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1}, + /*new_last_added_document_id=*/1)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre(EqualsDocHitInfo( kDocumentId1, std::vector<SectionId>{kSectionId2})))); @@ -283,10 +285,11 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterOptimize) { // Mapping to kInvalidDocumentId will remove the hit. ICING_ASSERT_OK( - index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId})); + index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId}, + /*new_last_added_document_id=*/0)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(IsEmpty())); - EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId); + EXPECT_EQ(index_->last_added_document_id(), kDocumentId0); } TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) { @@ -298,7 +301,8 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) { ICING_ASSERT_OK(index_->Merge()); - ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2})); + ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2}, + /*new_last_added_document_id=*/2)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre(EqualsDocHitInfo( kDocumentId2, std::vector<SectionId>{kSectionId2})))); @@ -306,7 +310,8 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) { // Mapping to a different docid will translate the hit ICING_ASSERT_OK(index_->Optimize( - /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1})); + /*document_id_old_to_new=*/{0, kInvalidDocumentId, kDocumentId1}, + /*new_last_added_document_id=*/1)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre(EqualsDocHitInfo( kDocumentId1, std::vector<SectionId>{kSectionId2})))); @@ -314,10 +319,11 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMergeAndOptimize) { // Mapping to kInvalidDocumentId will remove the hit. ICING_ASSERT_OK( - index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId})); + index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId}, + /*new_last_added_document_id=*/0)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(IsEmpty())); - EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId); + EXPECT_EQ(index_->last_added_document_id(), 0); } TEST_F(IndexTest, SingleHitMultiTermIndex) { @@ -369,7 +375,8 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) { EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk()); index_->set_last_added_document_id(kDocumentId2); - ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2})); + ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2}, + /*new_last_added_document_id=*/2)); EXPECT_THAT( GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre( @@ -383,7 +390,8 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) { // Delete document id 1, and document id 2 is translated to 1. ICING_ASSERT_OK( - index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1})); + index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1}, + /*new_last_added_document_id=*/1)); EXPECT_THAT( GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre( @@ -396,7 +404,8 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterOptimize) { // Delete all the rest documents. ICING_ASSERT_OK(index_->Optimize( - /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId})); + /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId}, + /*new_last_added_document_id=*/kInvalidDocumentId)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(IsEmpty())); EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY), @@ -423,7 +432,8 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) { ICING_ASSERT_OK(index_->Merge()); - ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2})); + ICING_ASSERT_OK(index_->Optimize(/*document_id_old_to_new=*/{0, 1, 2}, + /*new_last_added_document_id=*/2)); EXPECT_THAT( GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre( @@ -437,7 +447,8 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) { // Delete document id 1, and document id 2 is translated to 1. ICING_ASSERT_OK( - index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1})); + index_->Optimize(/*document_id_old_to_new=*/{0, kInvalidDocumentId, 1}, + /*new_last_added_document_id=*/1)); EXPECT_THAT( GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(ElementsAre( @@ -450,7 +461,8 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMergeAndOptimize) { // Delete all the rest documents. ICING_ASSERT_OK(index_->Optimize( - /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId})); + /*document_id_old_to_new=*/{kInvalidDocumentId, kInvalidDocumentId}, + /*new_last_added_document_id=*/kInvalidDocumentId)); EXPECT_THAT(GetHits("foo", TermMatchType::EXACT_ONLY), IsOkAndHolds(IsEmpty())); EXPECT_THAT(GetHits("bar", TermMatchType::EXACT_ONLY), @@ -986,7 +998,10 @@ TEST_F(IndexTest, FullIndexMerge) { TEST_F(IndexTest, OptimizeShouldWorkForEmptyIndex) { // Optimize an empty index should succeed, but have no effects. - ICING_ASSERT_OK(index_->Optimize(std::vector<DocumentId>())); + ICING_ASSERT_OK( + index_->Optimize(std::vector<DocumentId>(), + /*new_last_added_document_id=*/kInvalidDocumentId)); + EXPECT_EQ(index_->last_added_document_id(), kInvalidDocumentId); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<DocHitInfoIterator> itr, @@ -1053,7 +1068,8 @@ TEST_F(IndexTest, IndexOptimize) { std::reverse(exp_prefix_hits.begin(), exp_prefix_hits.end()); // Check that optimize is correct - ICING_ASSERT_OK(index_->Optimize(document_id_old_to_new)); + ICING_ASSERT_OK( + index_->Optimize(document_id_old_to_new, new_last_added_document_id)); EXPECT_EQ(index_->last_added_document_id(), new_last_added_document_id); // Check prefix search. ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocHitInfo> hits, diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index 9622ff4..3e614d2 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -484,7 +484,8 @@ uint32_t LiteIndex::Seek(uint32_t term_id) { libtextclassifier3::Status LiteIndex::Optimize( const std::vector<DocumentId>& document_id_old_to_new, - const TermIdCodec* term_id_codec) { + const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) { + header_->set_last_added_docid(new_last_added_document_id); if (header_->cur_size() == 0) { return libtextclassifier3::Status::OK; } @@ -492,8 +493,6 @@ libtextclassifier3::Status LiteIndex::Optimize( // which helps later to determine which terms will be unused after compaction. SortHits(); uint32_t new_size = 0; - // The largest document id after translating hits. - DocumentId largest_document_id = kInvalidDocumentId; uint32_t curr_term_id = 0; uint32_t curr_tvi = 0; std::unordered_set<uint32_t> tvi_to_delete; @@ -518,10 +517,6 @@ libtextclassifier3::Status LiteIndex::Optimize( if (new_document_id == kInvalidDocumentId) { continue; } - if (largest_document_id == kInvalidDocumentId || - new_document_id > largest_document_id) { - largest_document_id = new_document_id; - } if (term_id_hit_pair.hit().is_in_prefix_section()) { lexicon_.SetProperty(curr_tvi, GetHasHitsInPrefixSectionPropertyId()); } @@ -539,7 +534,6 @@ libtextclassifier3::Status LiteIndex::Optimize( } header_->set_cur_size(new_size); header_->set_searchable_end(new_size); - header_->set_last_added_docid(largest_document_id); // Delete unused terms. std::unordered_set<std::string> terms_to_delete; diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index 64b5881..be629b8 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -263,13 +263,16 @@ class LiteIndex { // Reduces internal file sizes by reclaiming space of deleted documents. // + // This method also sets the last_added_docid of the index to + // new_last_added_document_id. + // // Returns: // OK on success // INTERNAL_ERROR on IO error, this indicates that the index may be in an // invalid state and should be cleared. libtextclassifier3::Status Optimize( const std::vector<DocumentId>& document_id_old_to_new, - const TermIdCodec* term_id_codec); + const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id); private: static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions(); diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h index 15030b0..4ed2e94 100644 --- a/icing/index/main/main-index.h +++ b/icing/index/main/main-index.h @@ -190,6 +190,9 @@ class MainIndex { // Reduces internal file sizes by reclaiming space of deleted documents. // + // This method will update the last_added_docid of the index to the largest + // document id that still appears in the index after compaction. + // // Returns: // OK on success // INTERNAL_ERROR on IO error, this indicates that the index may be in an |