diff options
Diffstat (limited to 'icing/index')
-rw-r--r-- | icing/index/data-indexing-handler.h (renamed from icing/index/section-indexing-handler.h) | 25 | ||||
-rw-r--r-- | icing/index/index-processor.cc | 29 | ||||
-rw-r--r-- | icing/index/index-processor.h | 43 | ||||
-rw-r--r-- | icing/index/index-processor_benchmark.cc | 73 | ||||
-rw-r--r-- | icing/index/index-processor_test.cc | 94 | ||||
-rw-r--r-- | icing/index/integer-section-indexing-handler.cc | 16 | ||||
-rw-r--r-- | icing/index/integer-section-indexing-handler.h | 27 | ||||
-rw-r--r-- | icing/index/numeric/integer-index.h | 2 | ||||
-rw-r--r-- | icing/index/string-section-indexing-handler.cc | 14 | ||||
-rw-r--r-- | icing/index/string-section-indexing-handler.h | 32 |
10 files changed, 218 insertions, 137 deletions
diff --git a/icing/index/section-indexing-handler.h b/icing/index/data-indexing-handler.h index 98efc8f..0061b79 100644 --- a/icing/index/section-indexing-handler.h +++ b/icing/index/data-indexing-handler.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef ICING_INDEX_SECTION_INDEXING_HANDLER_H_ -#define ICING_INDEX_SECTION_INDEXING_HANDLER_H_ +#ifndef ICING_INDEX_DATA_INDEXING_HANDLER_H_ +#define ICING_INDEX_DATA_INDEXING_HANDLER_H_ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/proto/logging.pb.h" @@ -24,24 +24,23 @@ namespace icing { namespace lib { -// Parent class for indexing different types of sections in TokenizedDocument. -class SectionIndexingHandler { +// Parent class for indexing different types of data in TokenizedDocument. +class DataIndexingHandler { public: - explicit SectionIndexingHandler(const Clock* clock) : clock_(*clock) {} + explicit DataIndexingHandler(const Clock* clock) : clock_(*clock) {} - virtual ~SectionIndexingHandler() = default; + virtual ~DataIndexingHandler() = default; - // Handles the indexing process: add data (hits) into the specific type index - // (e.g. term index, integer index) for all contents in the corresponding type - // of sections in tokenized_document. + // Handles the indexing process: add data into the specific type index (e.g. + // term index, integer index, qualified id type joinable index) for all + // contents in the corresponding type of data in tokenized_document. // For example, IntegerSectionIndexingHandler::Handle should add data into // integer index for all contents in tokenized_document.integer_sections. // // Also it should handle last added DocumentId properly (based on // recovery_mode_) to avoid adding previously indexed documents. // - // tokenized_document: document object with different types of tokenized - // sections. + // tokenized_document: document object with different types of tokenized data. // document_id: id of the document. // recovery_mode: decides how to handle document_id <= // last_added_document_id. If in recovery_mode, then @@ -60,10 +59,10 @@ class SectionIndexingHandler { bool recovery_mode, PutDocumentStatsProto* put_document_stats) = 0; protected: - const Clock& clock_; + const Clock& clock_; // Does not own. }; } // namespace lib } // namespace icing -#endif // ICING_INDEX_SECTION_INDEXING_HANDLER_H_ +#endif // ICING_INDEX_DATA_INDEXING_HANDLER_H_ diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc index 86a0826..34988f5 100644 --- a/icing/index/index-processor.cc +++ b/icing/index/index-processor.cc @@ -21,44 +21,21 @@ #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" -#include "icing/index/index.h" -#include "icing/index/integer-section-indexing-handler.h" -#include "icing/index/numeric/numeric-index.h" -#include "icing/index/string-section-indexing-handler.h" +#include "icing/index/data-indexing-handler.h" #include "icing/proto/logging.pb.h" #include "icing/store/document-id.h" -#include "icing/transform/normalizer.h" #include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" namespace icing { namespace lib { -libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> -IndexProcessor::Create(const Normalizer* normalizer, Index* index, - NumericIndex<int64_t>* integer_index, const Clock* clock, - bool recovery_mode) { - ICING_RETURN_ERROR_IF_NULL(normalizer); - ICING_RETURN_ERROR_IF_NULL(index); - ICING_RETURN_ERROR_IF_NULL(integer_index); - ICING_RETURN_ERROR_IF_NULL(clock); - - std::vector<std::unique_ptr<SectionIndexingHandler>> handlers; - handlers.push_back( - std::make_unique<StringSectionIndexingHandler>(clock, normalizer, index)); - handlers.push_back( - std::make_unique<IntegerSectionIndexingHandler>(clock, integer_index)); - - return std::unique_ptr<IndexProcessor>( - new IndexProcessor(std::move(handlers), clock, recovery_mode)); -} - libtextclassifier3::Status IndexProcessor::IndexDocument( const TokenizedDocument& tokenized_document, DocumentId document_id, PutDocumentStatsProto* put_document_stats) { // TODO(b/259744228): set overall index latency. - for (auto& section_indexing_handler : section_indexing_handlers_) { - ICING_RETURN_IF_ERROR(section_indexing_handler->Handle( + for (auto& data_indexing_handler : data_indexing_handlers_) { + ICING_RETURN_IF_ERROR(data_indexing_handler->Handle( tokenized_document, document_id, recovery_mode_, put_document_stats)); } diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h index 3d6b19a..9b96f00 100644 --- a/icing/index/index-processor.h +++ b/icing/index/index-processor.h @@ -20,12 +20,9 @@ #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" -#include "icing/index/index.h" -#include "icing/index/numeric/numeric-index.h" -#include "icing/index/section-indexing-handler.h" +#include "icing/index/data-indexing-handler.h" #include "icing/proto/logging.pb.h" #include "icing/store/document-id.h" -#include "icing/transform/normalizer.h" #include "icing/util/tokenized-document.h" namespace icing { @@ -33,24 +30,12 @@ namespace lib { class IndexProcessor { public: - // Factory function to create an IndexProcessor which does not take ownership - // of any input components, and all pointers must refer to valid objects that - // outlive the created IndexProcessor instance. - // - // - recovery_mode: a flag indicates that if IndexProcessor is used to restore - // index. Since there are several indices (term, integer) being restored at - // the same time, we start with the minimum last added DocumentId of all - // indices and replay documents to re-index, so it is possible to get some - // previously indexed documents in the recovery mode. Therefore, we should - // skip them without returning an error in recovery mode. - // - // Returns: - // An IndexProcessor on success - // FAILED_PRECONDITION if any of the pointers is null. - static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create( - const Normalizer* normalizer, Index* index, - NumericIndex<int64_t>* integer_index_, const Clock* clock, - bool recovery_mode = false); + explicit IndexProcessor(std::vector<std::unique_ptr<DataIndexingHandler>>&& + data_indexing_handlers, + const Clock* clock, bool recovery_mode = false) + : data_indexing_handlers_(std::move(data_indexing_handlers)), + clock_(*clock), + recovery_mode_(recovery_mode) {} // Add tokenized document to the index, associated with document_id. If the // number of tokens in the document exceeds max_tokens_per_document, then only @@ -65,22 +50,14 @@ class IndexProcessor { // // Returns: // - OK on success. - // - Any SectionIndexingHandler errors. + // - Any DataIndexingHandler errors. libtextclassifier3::Status IndexDocument( const TokenizedDocument& tokenized_document, DocumentId document_id, PutDocumentStatsProto* put_document_stats = nullptr); private: - explicit IndexProcessor(std::vector<std::unique_ptr<SectionIndexingHandler>>&& - section_indexing_handlers, - const Clock* clock, bool recovery_mode) - : section_indexing_handlers_(std::move(section_indexing_handlers)), - clock_(*clock), - recovery_mode_(recovery_mode) {} - - std::vector<std::unique_ptr<SectionIndexingHandler>> - section_indexing_handlers_; - const Clock& clock_; + std::vector<std::unique_ptr<DataIndexingHandler>> data_indexing_handlers_; + const Clock& clock_; // Does not own. bool recovery_mode_; }; diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc index 6608e44..ee43364 100644 --- a/icing/index/index-processor_benchmark.cc +++ b/icing/index/index-processor_benchmark.cc @@ -12,14 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <memory> +#include <utility> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "testing/base/public/benchmark.h" #include "gmock/gmock.h" #include "icing/document-builder.h" #include "icing/file/filesystem.h" +#include "icing/index/data-indexing-handler.h" #include "icing/index/index-processor.h" #include "icing/index/index.h" -#include "icing/index/numeric/dummy-numeric-index.h" +#include "icing/index/integer-section-indexing-handler.h" +#include "icing/index/numeric/integer-index.h" #include "icing/index/numeric/numeric-index.h" +#include "icing/index/string-section-indexing-handler.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/schema/schema-store.h" #include "icing/schema/schema-util.h" @@ -173,6 +181,24 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem, return schema_store; } +libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>> +CreateDataIndexingHandlers(const Clock* clock, const Normalizer* normalizer, + Index* index, NumericIndex<int64_t>* integer_index) { + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create(clock, normalizer, index)); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<IntegerSectionIndexingHandler> + integer_section_indexing_handler, + IntegerSectionIndexingHandler::Create(clock, integer_index)); + + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + handlers.push_back(std::move(integer_section_indexing_handler)); + return handlers; +} + void CleanUp(const Filesystem& filesystem, const std::string& base_dir) { filesystem.DeleteDirectoryRecursively(base_dir.c_str()); } @@ -198,7 +224,7 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -206,10 +232,14 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) { Clock clock; std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(filesystem, &clock, base_dir); + ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<IndexProcessor> index_processor, - IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), - &clock)); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers, + CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(), + integer_index.get())); + auto index_processor = + std::make_unique<IndexProcessor>(std::move(handlers), &clock); + DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0)); TokenizedDocument tokenized_document(std::move( TokenizedDocument::Create(schema_store.get(), language_segmenter.get(), @@ -268,7 +298,7 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -276,10 +306,13 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) { Clock clock; std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(filesystem, &clock, base_dir); + ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<IndexProcessor> index_processor, - IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), - &clock)); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers, + CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(), + integer_index.get())); + auto index_processor = + std::make_unique<IndexProcessor>(std::move(handlers), &clock); DocumentProto input_document = CreateDocumentWithTenProperties(state.range(0)); @@ -340,7 +373,7 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -348,10 +381,13 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) { Clock clock; std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(filesystem, &clock, base_dir); + ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<IndexProcessor> index_processor, - IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), - &clock)); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers, + CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(), + integer_index.get())); + auto index_processor = + std::make_unique<IndexProcessor>(std::move(handlers), &clock); DocumentProto input_document = CreateDocumentWithDiacriticLetters(state.range(0)); @@ -412,7 +448,7 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - DummyNumericIndex<int64_t>::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -420,10 +456,13 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) { Clock clock; std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(filesystem, &clock, base_dir); + ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<IndexProcessor> index_processor, - IndexProcessor::Create(normalizer.get(), index.get(), integer_index.get(), - &clock)); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers, + CreateDataIndexingHandlers(&clock, normalizer.get(), index.get(), + integer_index.get())); + auto index_processor = + std::make_unique<IndexProcessor>(std::move(handlers), &clock); DocumentProto input_document = CreateDocumentWithHiragana(state.range(0)); TokenizedDocument tokenized_document(std::move( diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc index c22e8f0..3a9b4ee 100644 --- a/icing/index/index-processor_test.cc +++ b/icing/index/index-processor_test.cc @@ -30,12 +30,15 @@ #include "icing/absl_ports/str_join.h" #include "icing/document-builder.h" #include "icing/file/filesystem.h" +#include "icing/index/data-indexing-handler.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/index.h" +#include "icing/index/integer-section-indexing-handler.h" #include "icing/index/iterator/doc-hit-info-iterator-test-util.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/integer-index.h" #include "icing/index/numeric/numeric-index.h" +#include "icing/index/string-section-indexing-handler.h" #include "icing/index/term-property-id.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/legacy/index/icing-mock-filesystem.h" @@ -258,9 +261,21 @@ class IndexProcessorTest : public Test { ICING_ASSERT_OK(schema_store_->SetSchema(schema)); ICING_ASSERT_OK_AND_ASSIGN( - index_processor_, - IndexProcessor::Create(normalizer_.get(), index_.get(), - integer_index_.get(), &fake_clock_)); + std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(), + index_.get())); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler> + integer_section_indexing_handler, + IntegerSectionIndexingHandler::Create( + &fake_clock_, integer_index_.get())); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + handlers.push_back(std::move(integer_section_indexing_handler)); + + index_processor_ = + std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_); + mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>(); } @@ -290,6 +305,7 @@ class IndexProcessorTest : public Test { std::unique_ptr<LanguageSegmenter> lang_segmenter_; std::unique_ptr<Normalizer> normalizer_; std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<IndexProcessor> index_processor_; }; @@ -315,16 +331,6 @@ std::vector<DocHitInfoTermFrequencyPair> GetHitsWithTermFrequency( return infos; } -TEST_F(IndexProcessorTest, CreationWithNullPointerShouldFail) { - EXPECT_THAT(IndexProcessor::Create(/*normalizer=*/nullptr, index_.get(), - integer_index_.get(), &fake_clock_), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - - EXPECT_THAT(IndexProcessor::Create(normalizer_.get(), /*index=*/nullptr, - integer_index_.get(), &fake_clock_), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); -} - TEST_F(IndexProcessorTest, NoTermMatchTypeContent) { DocumentProto document = DocumentBuilder() @@ -584,10 +590,15 @@ TEST_F(IndexProcessorTest, TooLongTokens) { normalizer_factory::Create( /*max_term_byte_size=*/4)); - ICING_ASSERT_OK_AND_ASSIGN( - index_processor_, - IndexProcessor::Create(normalizer.get(), index_.get(), - integer_index_.get(), &fake_clock_)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create( + &fake_clock_, normalizer.get(), index_.get())); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + + index_processor_ = + std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_); DocumentProto document = DocumentBuilder() @@ -769,10 +780,20 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) { TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) { ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<IndexProcessor> index_processor, - IndexProcessor::Create(normalizer_.get(), index_.get(), - integer_index_.get(), &fake_clock_, - /*recovery_mode=*/true)); + std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(), + index_.get())); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler> + integer_section_indexing_handler, + IntegerSectionIndexingHandler::Create( + &fake_clock_, integer_index_.get())); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + handlers.push_back(std::move(integer_section_indexing_handler)); + + IndexProcessor index_processor(std::move(handlers), &fake_clock_, + /*recovery_mode=*/true); DocumentProto document = DocumentBuilder() @@ -785,7 +806,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) { TokenizedDocument tokenized_document, TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), document)); - EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId1), + EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1), IsOk()); EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1)); @@ -808,7 +829,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) { tokenized_document, TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), document)); - EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId0), + EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId0), IsOk()); // Verify that both index_ and integer_index_ are unchanged. EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1)); @@ -818,7 +839,7 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) { IsOkAndHolds(integer_index_crc)); // As should indexing a document document_id == last_added_document_id. - EXPECT_THAT(index_processor->IndexDocument(tokenized_document, kDocumentId1), + EXPECT_THAT(index_processor.IndexDocument(tokenized_document, kDocumentId1), IsOk()); // Verify that both index_ and integer_index_ are unchanged. EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1)); @@ -907,9 +928,16 @@ TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) { index_, Index::Create(options, &filesystem_, &icing_filesystem_)); ICING_ASSERT_OK_AND_ASSIGN( - index_processor_, - IndexProcessor::Create(normalizer_.get(), index_.get(), - integer_index_.get(), &fake_clock_)); + std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(), + index_.get())); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + + index_processor_ = + std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_); + DocumentId doc_id = 0; // Have determined experimentally that indexing 3373 documents with this text // will cause the LiteIndex to fill up. Further indexing will fail unless the @@ -964,9 +992,15 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) { Index::Create(options, &filesystem_, mock_icing_filesystem_.get())); ICING_ASSERT_OK_AND_ASSIGN( - index_processor_, - IndexProcessor::Create(normalizer_.get(), index_.get(), - integer_index_.get(), &fake_clock_)); + std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create(&fake_clock_, normalizer_.get(), + index_.get())); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + + index_processor_ = + std::make_unique<IndexProcessor>(std::move(handlers), &fake_clock_); // 3. Index one document. This should fit in the LiteIndex without requiring a // merge. diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc index 0ed01d3..d201a1a 100644 --- a/icing/index/integer-section-indexing-handler.cc +++ b/icing/index/integer-section-indexing-handler.cc @@ -14,8 +14,11 @@ #include "icing/index/integer-section-indexing-handler.h" +#include <cstdint> +#include <memory> + #include "icing/text_classifier/lib3/utils/base/status.h" -#include "icing/schema/section-manager.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/util/logging.h" @@ -24,6 +27,17 @@ namespace icing { namespace lib { +/* static */ libtextclassifier3::StatusOr< + std::unique_ptr<IntegerSectionIndexingHandler>> +IntegerSectionIndexingHandler::Create(const Clock* clock, + NumericIndex<int64_t>* integer_index) { + ICING_RETURN_ERROR_IF_NULL(clock); + ICING_RETURN_ERROR_IF_NULL(integer_index); + + return std::unique_ptr<IntegerSectionIndexingHandler>( + new IntegerSectionIndexingHandler(clock, integer_index)); +} + libtextclassifier3::Status IntegerSectionIndexingHandler::Handle( const TokenizedDocument& tokenized_document, DocumentId document_id, bool recovery_mode, PutDocumentStatsProto* put_document_stats) { diff --git a/icing/index/integer-section-indexing-handler.h b/icing/index/integer-section-indexing-handler.h index d75815c..42ce07e 100644 --- a/icing/index/integer-section-indexing-handler.h +++ b/icing/index/integer-section-indexing-handler.h @@ -15,9 +15,13 @@ #ifndef ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_ #define ICING_INDEX_INTEGER_SECTION_INDEXING_HANDLER_H_ +#include <cstdint> +#include <memory> + #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/index/data-indexing-handler.h" #include "icing/index/numeric/numeric-index.h" -#include "icing/index/section-indexing-handler.h" #include "icing/store/document-id.h" #include "icing/util/clock.h" #include "icing/util/tokenized-document.h" @@ -25,11 +29,18 @@ namespace icing { namespace lib { -class IntegerSectionIndexingHandler : public SectionIndexingHandler { +class IntegerSectionIndexingHandler : public DataIndexingHandler { public: - explicit IntegerSectionIndexingHandler(const Clock* clock, - NumericIndex<int64_t>* integer_index) - : SectionIndexingHandler(clock), integer_index_(*integer_index) {} + // Creates an IntegerSectionIndexingHandler instance which does not take + // ownership of any input components. All pointers must refer to valid objects + // that outlive the created IntegerSectionIndexingHandler instance. + // + // Returns: + // - An IntegerSectionIndexingHandler instance on success + // - FAILED_PRECONDITION_ERROR if any of the input pointer is null + static libtextclassifier3::StatusOr< + std::unique_ptr<IntegerSectionIndexingHandler>> + Create(const Clock* clock, NumericIndex<int64_t>* integer_index); ~IntegerSectionIndexingHandler() override = default; @@ -46,7 +57,11 @@ class IntegerSectionIndexingHandler : public SectionIndexingHandler { bool recovery_mode, PutDocumentStatsProto* put_document_stats) override; private: - NumericIndex<int64_t>& integer_index_; + explicit IntegerSectionIndexingHandler(const Clock* clock, + NumericIndex<int64_t>* integer_index) + : DataIndexingHandler(clock), integer_index_(*integer_index) {} + + NumericIndex<int64_t>& integer_index_; // Does not own. }; } // namespace lib diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h index 98c26ef..050a143 100644 --- a/icing/index/numeric/integer-index.h +++ b/icing/index/numeric/integer-index.h @@ -216,7 +216,7 @@ class IntegerIndex : public NumericIndex<int64_t> { // Returns: // - OK on success // - INTERNAL_ERROR on I/O error. This could potentially leave the storages - // in an invalid state and the caller should handle it property (e.g. + // in an invalid state and the caller should handle it properly (e.g. // discard and rebuild) libtextclassifier3::Status TransferIndex( const std::vector<DocumentId>& document_id_old_to_new, diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc index 7cd0909..83a2687 100644 --- a/icing/index/string-section-indexing-handler.cc +++ b/icing/index/string-section-indexing-handler.cc @@ -20,6 +20,7 @@ #include <string_view> #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/index/index.h" #include "icing/legacy/core/icing-string-util.h" @@ -34,6 +35,19 @@ namespace icing { namespace lib { +/* static */ libtextclassifier3::StatusOr< + std::unique_ptr<StringSectionIndexingHandler>> +StringSectionIndexingHandler::Create(const Clock* clock, + const Normalizer* normalizer, + Index* index) { + ICING_RETURN_ERROR_IF_NULL(clock); + ICING_RETURN_ERROR_IF_NULL(normalizer); + ICING_RETURN_ERROR_IF_NULL(index); + + return std::unique_ptr<StringSectionIndexingHandler>( + new StringSectionIndexingHandler(clock, normalizer, index)); +} + libtextclassifier3::Status StringSectionIndexingHandler::Handle( const TokenizedDocument& tokenized_document, DocumentId document_id, bool recovery_mode, PutDocumentStatsProto* put_document_stats) { diff --git a/icing/index/string-section-indexing-handler.h b/icing/index/string-section-indexing-handler.h index 36f6a05..6abfba5 100644 --- a/icing/index/string-section-indexing-handler.h +++ b/icing/index/string-section-indexing-handler.h @@ -15,9 +15,12 @@ #ifndef ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_ #define ICING_INDEX_STRING_SECTION_INDEXING_HANDLER_H_ +#include <memory> + #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/index/data-indexing-handler.h" #include "icing/index/index.h" -#include "icing/index/section-indexing-handler.h" #include "icing/proto/logging.pb.h" #include "icing/store/document-id.h" #include "icing/transform/normalizer.h" @@ -27,14 +30,18 @@ namespace icing { namespace lib { -class StringSectionIndexingHandler : public SectionIndexingHandler { +class StringSectionIndexingHandler : public DataIndexingHandler { public: - explicit StringSectionIndexingHandler(const Clock* clock, - const Normalizer* normalizer, - Index* index) - : SectionIndexingHandler(clock), - normalizer_(*normalizer), - index_(*index) {} + // Creates a StringSectionIndexingHandler instance which does not take + // ownership of any input components. All pointers must refer to valid objects + // that outlive the created StringSectionIndexingHandler instance. + // + // Returns: + // - A StringSectionIndexingHandler instance on success + // - FAILED_PRECONDITION_ERROR if any of the input pointer is null + static libtextclassifier3::StatusOr< + std::unique_ptr<StringSectionIndexingHandler>> + Create(const Clock* clock, const Normalizer* normalizer, Index* index); ~StringSectionIndexingHandler() override = default; @@ -57,8 +64,13 @@ class StringSectionIndexingHandler : public SectionIndexingHandler { bool recovery_mode, PutDocumentStatsProto* put_document_stats) override; private: - const Normalizer& normalizer_; - Index& index_; + explicit StringSectionIndexingHandler(const Clock* clock, + const Normalizer* normalizer, + Index* index) + : DataIndexingHandler(clock), normalizer_(*normalizer), index_(*index) {} + + const Normalizer& normalizer_; // Does not own. + Index& index_; // Does not own. }; } // namespace lib |