aboutsummaryrefslogtreecommitdiff
path: root/icing/index
diff options
context:
space:
mode:
Diffstat (limited to 'icing/index')
-rw-r--r--icing/index/data-indexing-handler.h (renamed from icing/index/section-indexing-handler.h)25
-rw-r--r--icing/index/index-processor.cc29
-rw-r--r--icing/index/index-processor.h43
-rw-r--r--icing/index/index-processor_benchmark.cc73
-rw-r--r--icing/index/index-processor_test.cc94
-rw-r--r--icing/index/integer-section-indexing-handler.cc16
-rw-r--r--icing/index/integer-section-indexing-handler.h27
-rw-r--r--icing/index/numeric/integer-index.h2
-rw-r--r--icing/index/string-section-indexing-handler.cc14
-rw-r--r--icing/index/string-section-indexing-handler.h32
10 files changed, 218 insertions, 137 deletions
diff --git a/icing/index/section-indexing-handler.h b/icing/index/data-indexing-handler.h
index 98efc8f..0061b79 100644
--- a/icing/index/section-indexing-handler.h
+++ b/icing/index/data-indexing-handler.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_SECTION_INDEXING_HANDLER_H_
-#define ICING_INDEX_SECTION_INDEXING_HANDLER_H_
+#ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_
+#define ICING_INDEX_DATA_INDEXING_HANDLER_H_
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/proto/logging.pb.h"
@@ -24,24 +24,23 @@
namespace icing {
namespace lib {
-// Parent class for indexing different types of sections in TokenizedDocument.
-class SectionIndexingHandler {
+// Parent class for indexing different types of data in TokenizedDocument.
+class DataIndexingHandler {
public:
- explicit SectionIndexingHandler(const Clock* clock) : clock_(*clock) {}
+ explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {}
- virtual ~SectionIndexingHandler() = default;
+ virtual ~DataIndexingHandler() = default;
- // Handles the indexing process: add data (hits) into the specific type index
- // (e.g. term index, integer index) for all contents in the corresponding type
- // of sections in tokenized_document.
+ // Handles the indexing process: add data into the specific type index (e.g.
+ // term index, integer index, qualified id type joinable index) for all
+ // contents in the corresponding type of data in tokenized_document.
// For example, IntegerSectionIndexingHandler::Handle should add data into
// integer index for all contents in tokenized_document.integer_sections.
//
// Also it should handle last added DocumentId properly (based on
// recovery_mode_) to avoid adding previously indexed documents.
//
- // tokenized_document: document object with different types of tokenized
- // sections.
+ // tokenized_document: document object with different types of tokenized data.
// document_id: id of the document.
// recovery_mode: decides how to handle document_id <=
// last_added_document_id. If in recovery_mode, then
@@ -60,10 +59,10 @@ class SectionIndexingHandler {
bool recovery_mode, PutDocumentStatsProto* put_document_stats) = 0;
protected:
- const Clock& clock_;
+ const Clock& clock_; // Does not own.
};
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_SECTION_INDEXING_HANDLER_H_
+#endif // ICING_INDEX_DATA_INDEXING_HANDLER_H_
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 86a0826..34988f5 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -21,44 +21,21 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/index/index.h"
-#include "icing/index/integer-section-indexing-handler.h"
-#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/string-section-indexing-handler.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
-#include "icing/transform/normalizer.h"
#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
namespace lib {
-libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>>
-IndexProcessor::Create(const Normalizer* normalizer, Index* index,
- NumericIndex<int64_t>* integer_index, const Clock* clock,
- bool recovery_mode) {
- ICING_RETURN_ERROR_IF_NULL(normalizer);
- ICING_RETURN_ERROR_IF_NULL(index);
- ICING_RETURN_ERROR_IF_NULL(integer_index);
- ICING_RETURN_ERROR_IF_NULL(clock);
-
- std::vector<std::unique_ptr<SectionIndexingHandler>> handlers;
- handlers.push_back(
- std::make_unique<StringSectionIndexingHandler>(clock, normalizer, index));
- handlers.push_back(
- std::make_unique<IntegerSectionIndexingHandler>(clock, integer_index));
-
- return std::unique_ptr<IndexProcessor>(
- new IndexProcessor(std::move(handlers), clock, recovery_mode));
-}
-
libtextclassifier3::Status IndexProcessor::IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
PutDocumentStatsProto* put_document_stats) {
// TODO(b/259744228): set overall index latency.
- for (auto& section_indexing_handler : section_indexing_handlers_) {
- ICING_RETURN_IF_ERROR(section_indexing_handler->Handle(
+ for (auto& data_indexing_handler : data_indexing_handlers_) {
+ ICING_RETURN_IF_ERROR(data_indexing_handler->Handle(
tokenized_document, document_id, recovery_mode_, put_document_stats));
}
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 3d6b19a..9b96f00 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -20,12 +20,9 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/index/index.h"
-#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/section-indexing-handler.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
-#include "icing/transform/normalizer.h"
#include "icing/util/tokenized-document.h"
namespace icing {
@@ -33,24 +30,12 @@ namespace lib {
class IndexProcessor {
public:
- // Factory function to create an IndexProcessor which does not take ownership
- // of any input components, and all pointers must refer to valid objects that
- // outlive the created IndexProcessor instance.
- //
- // - recovery_mode: a flag indicates that if IndexProcessor is used to restore
- // index. Since there are several indices (term, integer) being restored at
- // the same time, we start with the minimum last added DocumentId of all
- // indices and replay documents to re-index, so it is possible to get some
- // previously indexed documents in the recovery mode. Therefore, we should
- // skip them without returning an error in recovery mode.
- //
- // Returns:
- // An IndexProcessor on success
- // FAILED_PRECONDITION if any of the pointers is null.
- static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
- const Normalizer* normalizer, Index* index,
- NumericIndex<int64_t>* integer_index_, const Clock* clock,
- bool recovery_mode = false);
+ explicit IndexProcessor(std::vector<std::unique_ptr<DataIndexingHandler>>&&
+ data_indexing_handlers,
+ const Clock* clock, bool recovery_mode = false)
+ : data_indexing_handlers_(std::move(data_indexing_handlers)),
+ clock_(*clock),
+ recovery_mode_(recovery_mode) {}
// Add tokenized document to the index, associated with document_id. If the
// number of tokens in the document exceeds max_tokens_per_document, then only
@@ -65,22 +50,14 @@ class IndexProcessor {
//
// Returns:
// - OK on success.
- // - Any SectionIndexingHandler errors.
+ // - Any DataIndexingHandler errors.
libtextclassifier3::Status IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
PutDocumentStatsProto* put_document_stats = nullptr);
private:
- explicit IndexProcessor(std::vector<std::unique_ptr<SectionIndexingHandler>>&&
- section_indexing_handlers,
- const Clock* clock, bool recovery_mode)
- : section_indexing_handlers_(std::move(section_indexing_handlers)),
- clock_(*clock),
- recovery_mode_(recovery_mode) {}
-
- std::vector<std::unique_ptr<SectionIndexingHandler>>
- section_indexing_handlers_;
- const Clock& clock_;
+ std::vector<std::unique_ptr<DataIndexingHandler>> data_indexing_handlers_;
+ const Clock& clock_; // Does not own.
bool recovery_mode_;
};
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 6608e44..ee43364 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -12,14 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index-processor.h"
#include "icing/index/index.h"
-#include "icing/index/numeric/dummy-numeric-index.h"
+#include "icing/index/integer-section-indexing-handler.h"
+#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/string-section-indexing-handler.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/schema-util.h"
@@ -173,6 +181,24 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem,
return schema_store;
}
+libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
+CreateDataIndexingHandlers(const Clock* clock, const Normalizer* normalizer,
+ Index* index, NumericIndex<int64_t>* integer_index) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(clock, normalizer, index));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(clock, integer_index));
+
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+ return handlers;
+}
+
void CleanUp(const Filesystem& filesystem, const std::string& base_dir) {
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
}
@@ -198,7 +224,7 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
CreateIndex(icing_filesystem, filesystem, index_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
- DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
+ IntegerIndex::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -206,10 +232,14 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
Clock clock;
std::unique_ptr<SchemaStore> schema_store =
CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
+
DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
TokenizedDocument tokenized_document(std::move(
TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
@@ -268,7 +298,7 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
CreateIndex(icing_filesystem, filesystem, index_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
- DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
+ IntegerIndex::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -276,10 +306,13 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
Clock clock;
std::unique_ptr<SchemaStore> schema_store =
CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
DocumentProto input_document =
CreateDocumentWithTenProperties(state.range(0));
@@ -340,7 +373,7 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
CreateIndex(icing_filesystem, filesystem, index_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
- DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
+ IntegerIndex::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -348,10 +381,13 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
Clock clock;
std::unique_ptr<SchemaStore> schema_store =
CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
DocumentProto input_document =
CreateDocumentWithDiacriticLetters(state.range(0));
@@ -412,7 +448,7 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
CreateIndex(icing_filesystem, filesystem, index_dir);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
- DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir));
+ IntegerIndex::Create(filesystem, integer_index_dir));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -420,10 +456,13 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
Clock clock;
std::unique_ptr<SchemaStore> schema_store =
CreateSchemaStore(filesystem, &clock, base_dir);
+
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(),
- &clock));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers,
+ CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(),
+ integer_index.get()));
+ auto index_processor =
+ std::make_unique<IndexProcessor>(std::move(handlers), &clock);
DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
TokenizedDocument tokenized_document(std::move(
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index c22e8f0..3a9b4ee 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -30,12 +30,15 @@
#include "icing/absl_ports/str_join.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/string-section-indexing-handler.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
@@ -258,9 +261,21 @@ class IndexProcessorTest : public Test {
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ &fake_clock_, integer_index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
}
@@ -290,6 +305,7 @@ class IndexProcessorTest : public Test {
std::unique_ptr<LanguageSegmenter> lang_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<SchemaStore> schema_store_;
+
std::unique_ptr<IndexProcessor> index_processor_;
};
@@ -315,16 +331,6 @@ std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency(
return infos;
}
-TEST_F(IndexProcessorTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(IndexProcessor::Create(/*normalizer=*/nullptr, index_.get(),
- integer_index_.get(), &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
- EXPECT_THAT(IndexProcessor::Create(normalizer_.get(), /*index=*/nullptr,
- integer_index_.get(), &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-}
-
TEST_F(IndexProcessorTest, NoTermMatchTypeContent) {
DocumentProto document =
DocumentBuilder()
@@ -584,10 +590,15 @@ TEST_F(IndexProcessorTest, TooLongTokens) {
normalizer_factory::Create(
/*max_term_byte_size=*/4));
- ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(
+ &fake_clock_, normalizer.get(), index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
DocumentProto document =
DocumentBuilder()
@@ -769,10 +780,20 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_,
- /*recovery_mode=*/true));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ &fake_clock_, integer_index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+
+ IndexProcessor index_processor(std::move(handlers), &fake_clock_,
+ /*recovery_mode=*/true);
DocumentProto document =
DocumentBuilder()
@@ -785,7 +806,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
TokenizedDocument tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
document));
- EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId1),
+ EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1),
IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
@@ -808,7 +829,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
tokenized_document,
TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
document));
- EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId0),
+ EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId0),
IsOk());
// Verify that both index_ and integer_index_ are unchanged.
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
@@ -818,7 +839,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
IsOkAndHolds(integer_index_crc));
// As should indexing a document document_id == last_added_document_id.
- EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId1),
+ EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1),
IsOk());
// Verify that both index_ and integer_index_ are unchanged.
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
@@ -907,9 +928,16 @@ TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
+
DocumentId doc_id = 0;
// Have determined experimentally that indexing 3373 documents with this text
// will cause the LiteIndex to fill up. Further indexing will fail unless the
@@ -964,9 +992,15 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- integer_index_.get(), &fake_clock_));
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(),
+ index_.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+
+ index_processor_ =
+ std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_);
// 3. Index one document. This should fit in the LiteIndex without requiring a
// merge.
diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc
index 0ed01d3..d201a1a 100644
--- a/icing/index/integer-section-indexing-handler.cc
+++ b/icing/index/integer-section-indexing-handler.cc
@@ -14,8 +14,11 @@
#include "icing/index/integer-section-indexing-handler.h"
+#include <cstdint>
+#include <memory>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
-#include "icing/schema/section-manager.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/util/logging.h"
@@ -24,6 +27,17 @@
namespace icing {
namespace lib {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<IntegerSectionIndexingHandler>>
+IntegerSectionIndexingHandler::Create(const Clock* clock,
+ NumericIndex<int64_t>* integer_index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(integer_index);
+
+ return std::unique_ptr<IntegerSectionIndexingHandler>(
+ new IntegerSectionIndexingHandler(clock, integer_index));
+}
+
libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
diff --git a/icing/index/integer-section-indexing-handler.h b/icing/index/integer-section-indexing-handler.h
index d75815c..42ce07e 100644
--- a/icing/index/integer-section-indexing-handler.h
+++ b/icing/index/integer-section-indexing-handler.h
@@ -15,9 +15,13 @@
#ifndef ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
#define ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_
+#include <cstdint>
+#include <memory>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/numeric/numeric-index.h"
-#include "icing/index/section-indexing-handler.h"
#include "icing/store/document-id.h"
#include "icing/util/clock.h"
#include "icing/util/tokenized-document.h"
@@ -25,11 +29,18 @@
namespace icing {
namespace lib {
-class IntegerSectionIndexingHandler : public SectionIndexingHandler {
+class IntegerSectionIndexingHandler : public DataIndexingHandler {
public:
- explicit IntegerSectionIndexingHandler(const Clock* clock,
- NumericIndex<int64_t>* integer_index)
- : SectionIndexingHandler(clock), integer_index_(*integer_index) {}
+ // Creates an IntegerSectionIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created IntegerSectionIndexingHandler instance.
+ //
+ // Returns:
+ // - An IntegerSectionIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<IntegerSectionIndexingHandler>>
+ Create(const Clock* clock, NumericIndex<int64_t>* integer_index);
~IntegerSectionIndexingHandler() override = default;
@@ -46,7 +57,11 @@ class IntegerSectionIndexingHandler : public SectionIndexingHandler {
bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
- NumericIndex<int64_t>& integer_index_;
+ explicit IntegerSectionIndexingHandler(const Clock* clock,
+ NumericIndex<int64_t>* integer_index)
+ : DataIndexingHandler(clock), integer_index_(*integer_index) {}
+
+ NumericIndex<int64_t>& integer_index_; // Does not own.
};
} // namespace lib
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index 98c26ef..050a143 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -216,7 +216,7 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error. This could potentially leave the storages
- // in an invalid state and the caller should handle it property (e.g.
+ // in an invalid state and the caller should handle it properly (e.g.
// discard and rebuild)
libtextclassifier3::Status TransferIndex(
const std::vector<DocumentId>& document_id_old_to_new,
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index 7cd0909..83a2687 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -20,6 +20,7 @@
#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/index.h"
#include "icing/legacy/core/icing-string-util.h"
@@ -34,6 +35,19 @@
namespace icing {
namespace lib {
+/* static */ libtextclassifier3::StatusOr<
+ std::unique_ptr<StringSectionIndexingHandler>>
+StringSectionIndexingHandler::Create(const Clock* clock,
+ const Normalizer* normalizer,
+ Index* index) {
+ ICING_RETURN_ERROR_IF_NULL(clock);
+ ICING_RETURN_ERROR_IF_NULL(normalizer);
+ ICING_RETURN_ERROR_IF_NULL(index);
+
+ return std::unique_ptr<StringSectionIndexingHandler>(
+ new StringSectionIndexingHandler(clock, normalizer, index));
+}
+
libtextclassifier3::Status StringSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
diff --git a/icing/index/string-section-indexing-handler.h b/icing/index/string-section-indexing-handler.h
index 36f6a05..6abfba5 100644
--- a/icing/index/string-section-indexing-handler.h
+++ b/icing/index/string-section-indexing-handler.h
@@ -15,9 +15,12 @@
#ifndef ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
#define ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_
+#include <memory>
+
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/data-indexing-handler.h"
#include "icing/index/index.h"
-#include "icing/index/section-indexing-handler.h"
#include "icing/proto/logging.pb.h"
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
@@ -27,14 +30,18 @@
namespace icing {
namespace lib {
-class StringSectionIndexingHandler : public SectionIndexingHandler {
+class StringSectionIndexingHandler : public DataIndexingHandler {
public:
- explicit StringSectionIndexingHandler(const Clock* clock,
- const Normalizer* normalizer,
- Index* index)
- : SectionIndexingHandler(clock),
- normalizer_(*normalizer),
- index_(*index) {}
+ // Creates a StringSectionIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created StringSectionIndexingHandler instance.
+ //
+ // Returns:
+ // - A StringSectionIndexingHandler instance on success
+ // - FAILED_PRECONDITION_ERROR if any of the input pointer is null
+ static libtextclassifier3::StatusOr<
+ std::unique_ptr<StringSectionIndexingHandler>>
+ Create(const Clock* clock, const Normalizer* normalizer, Index* index);
~StringSectionIndexingHandler() override = default;
@@ -57,8 +64,13 @@ class StringSectionIndexingHandler : public SectionIndexingHandler {
bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
- const Normalizer& normalizer_;
- Index& index_;
+ explicit StringSectionIndexingHandler(const Clock* clock,
+ const Normalizer* normalizer,
+ Index* index)
+ : DataIndexingHandler(clock), normalizer_(*normalizer), index_(*index) {}
+
+ const Normalizer& normalizer_; // Does not own.
+ Index& index_; // Does not own.
};
} // namespace lib