diff options
author | Cassie Wang <cassiewang@google.com> | 2020-07-21 14:26:01 -0700 |
---|---|---|
committer | Cassie Wang <cassiewang@google.com> | 2020-07-24 14:19:26 -0700 |
commit | c994b6ea30c9be8976da0b1bf6a8923907ff903f (patch) | |
tree | f69970caf14ff26cfea1f3a573c2c27a7cd83e3d /icing/index/lite | |
parent | 09d66401215254a2bdfc134009039636054d28d2 (diff) | |
download | icing-c994b6ea30c9be8976da0b1bf6a8923907ff903f.tar.gz |
Pull upstream changes.
Change-Id: Ieed20fd00a7c00778045434ae1b7c9e019a6c369
Diffstat (limited to 'icing/index/lite')
-rw-r--r-- | icing/index/lite/lite-index.cc | 457 | ||||
-rw-r--r-- | icing/index/lite/lite-index.h | 269 |
2 files changed, 726 insertions, 0 deletions
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc new file mode 100644 index 0000000..a72402e --- /dev/null +++ b/icing/index/lite/lite-index.cc @@ -0,0 +1,457 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/lite/lite-index.h" + +#include <inttypes.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/mman.h> + +#include <algorithm> +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> +#include <utility> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/filesystem.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/hit/hit.h" +#include "icing/index/term-property-id.h" +#include "icing/legacy/core/icing-string-util.h" +#include "icing/legacy/core/icing-timer.h" +#include "icing/legacy/index/icing-array-storage.h" +#include "icing/legacy/index/icing-dynamic-trie.h" +#include "icing/legacy/index/icing-filesystem.h" +#include "icing/legacy/index/icing-lite-index-header.h" +#include "icing/legacy/index/icing-mmapper.h" +#include "icing/proto/term.pb.h" +#include "icing/schema/section.h" +#include "icing/store/document-id.h" +#include "icing/util/crc32.h" +#include "icing/util/logging.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +namespace { + +// Point at which we declare the trie full. +constexpr double kTrieFullFraction = 0.95; + +std::string MakeHitBufferFilename(const std::string& filename_base) { + return filename_base + "hb"; +} + +size_t header_size() { return sizeof(IcingLiteIndex_HeaderImpl::HeaderData); } + +} // namespace + +const LiteIndex::Element::Value LiteIndex::Element::kInvalidValue = + LiteIndex::Element(0, Hit()).value(); + +libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> LiteIndex::Create( + const LiteIndex::Options& options, const IcingFilesystem* filesystem) { + ICING_RETURN_ERROR_IF_NULL(filesystem); + + std::unique_ptr<LiteIndex> lite_index = + std::unique_ptr<LiteIndex>(new LiteIndex(options, filesystem)); + ICING_RETURN_IF_ERROR(lite_index->Initialize()); + return std::move(lite_index); +} + +// size is max size in elements. An appropriate lexicon and display +// mapping size will be chosen based on hit buffer size. +LiteIndex::LiteIndex(const LiteIndex::Options& options, + const IcingFilesystem* filesystem) + : hit_buffer_(*filesystem), + hit_buffer_crc_(0), + lexicon_(options.filename_base + "lexicon", MakeTrieRuntimeOptions(), + filesystem), + header_mmap_(false, MAP_SHARED), + options_(options), + filesystem_(filesystem) {} + +LiteIndex::~LiteIndex() { + if (initialized()) { + libtextclassifier3::Status unused = PersistToDisk(); + } +} + +IcingDynamicTrie::RuntimeOptions LiteIndex::MakeTrieRuntimeOptions() { + return IcingDynamicTrie::RuntimeOptions().set_storage_policy( + IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc); +} + +libtextclassifier3::Status LiteIndex::Initialize() { + // Size of hit buffer's header struct, rounded up to the nearest number of + // system memory pages. + const size_t header_padded_size = + IcingMMapper::page_aligned_size(header_size()); + + // Variable declarations cannot cross goto jumps, so declare these up top. + libtextclassifier3::Status status; + uint64_t file_size; + IcingTimer timer; + + if (!lexicon_.CreateIfNotExist(options_.lexicon_options) || + !lexicon_.Init()) { + return absl_ports::InternalError("Failed to initialize lexicon trie"); + } + + hit_buffer_fd_.reset(filesystem_->OpenForWrite( + MakeHitBufferFilename(options_.filename_base).c_str())); + if (!hit_buffer_fd_.is_valid()) { + status = absl_ports::InternalError("Failed to open hit buffer file"); + goto error; + } + + file_size = filesystem_->GetFileSize(hit_buffer_fd_.get()); + if (file_size == IcingFilesystem::kBadFileSize) { + status = absl_ports::InternalError("Failed to query hit buffer file size"); + goto error; + } + + if (file_size < header_padded_size) { + if (file_size != 0) { + status = absl_ports::InternalError(IcingStringUtil::StringPrintf( + "Hit buffer had unexpected size %" PRIu64, file_size)); + goto error; + } + + ICING_VLOG(2) << "Creating new hit buffer"; + // Make sure files are fresh. + if (!lexicon_.Remove() || + !lexicon_.CreateIfNotExist(options_.lexicon_options) || + !lexicon_.Init()) { + status = + absl_ports::InternalError("Failed to refresh lexicon during clear"); + goto error; + } + + // Create fresh hit buffer by first emptying the hit buffer file and then + // allocating header_padded_size of the cleared space. + if (!filesystem_->Truncate(hit_buffer_fd_.get(), 0) || + !filesystem_->Truncate(hit_buffer_fd_.get(), header_padded_size)) { + status = absl_ports::InternalError("Failed to truncate hit buffer file"); + goto error; + } + + // Set up header. + header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size()); + header_ = std::make_unique<IcingLiteIndex_HeaderImpl>( + reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>( + header_mmap_.address())); + header_->Reset(); + + if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true, + sizeof(Element::Value), header_->cur_size(), + options_.hit_buffer_size, &hit_buffer_crc_, true)) { + status = absl_ports::InternalError("Failed to initialize new hit buffer"); + goto error; + } + + UpdateChecksum(); + } else { + header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size()); + header_ = std::make_unique<IcingLiteIndex_HeaderImpl>( + reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>( + header_mmap_.address())); + + if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true, + sizeof(Element::Value), header_->cur_size(), + options_.hit_buffer_size, &hit_buffer_crc_, true)) { + status = absl_ports::InternalError( + "Failed to re-initialize existing hit buffer"); + goto error; + } + + // Check integrity. + if (!header_->check_magic()) { + status = absl_ports::InternalError("Lite index header magic mismatch"); + goto error; + } + Crc32 crc = ComputeChecksum(); + if (crc.Get() != header_->lite_index_crc()) { + status = absl_ports::DataLossError( + IcingStringUtil::StringPrintf("Lite index crc check failed: %u vs %u", + crc.Get(), header_->lite_index_crc())); + goto error; + } + } + + ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index init ok in %.3fms", + timer.Elapsed() * 1000); + return status; + +error: + header_ = nullptr; + header_mmap_.Unmap(); + lexicon_.Close(); + hit_buffer_crc_ = 0; + hit_buffer_.Reset(); + hit_buffer_fd_.reset(); + if (status.ok()) { + return absl_ports::InternalError( + "Error handling code ran but status was ok"); + } + return status; +} + +Crc32 LiteIndex::ComputeChecksum() { + IcingTimer timer; + + // Update crcs. + uint32_t dependent_crcs[2]; + hit_buffer_.UpdateCrc(); + dependent_crcs[0] = hit_buffer_crc_; + dependent_crcs[1] = lexicon_.UpdateCrc(); + + // Compute the master crc. + + // Header crc, excluding the actual crc field. + Crc32 all_crc(header_->CalculateHeaderCrc()); + all_crc.Append(std::string_view(reinterpret_cast<const char*>(dependent_crcs), + sizeof(dependent_crcs))); + ICING_VLOG(2) << IcingStringUtil::StringPrintf( + "Lite index crc computed in %.3fms", timer.Elapsed() * 1000); + + return all_crc; +} + +libtextclassifier3::Status LiteIndex::Reset() { + IcingTimer timer; + + // TODO(b/140436942): When these components have been changed to return errors + // they should be propagated from here. + lexicon_.Clear(); + hit_buffer_.Clear(); + header_->Reset(); + UpdateChecksum(); + + ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index clear in %.3fms", + timer.Elapsed() * 1000); + return libtextclassifier3::Status::OK; +} + +void LiteIndex::Warm() { + hit_buffer_.Warm(); + lexicon_.Warm(); +} + +libtextclassifier3::Status LiteIndex::PersistToDisk() { + bool success = true; + if (!lexicon_.Sync()) { + ICING_VLOG(1) << "Failed to sync the lexicon."; + success = false; + } + hit_buffer_.Sync(); + UpdateChecksum(); + header_mmap_.Sync(); + + return (success) ? libtextclassifier3::Status::OK + : absl_ports::InternalError( + "Unable to sync lite index components."); +} + +void LiteIndex::UpdateChecksum() { + header_->set_lite_index_crc(ComputeChecksum().Get()); +} + +libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm( + const std::string& term, TermMatchType::Code term_match_type, + NamespaceId namespace_id) { + uint32_t tvi; + if (!lexicon_.Insert(term.c_str(), "", &tvi, false)) { + return absl_ports::ResourceExhaustedError( + absl_ports::StrCat("Unable to add term ", term, " to lexicon!")); + } + ICING_RETURN_IF_ERROR(UpdateTermProperties( + tvi, term_match_type == TermMatchType::PREFIX, namespace_id)); + return tvi; +} + +libtextclassifier3::Status LiteIndex::UpdateTermProperties( + uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) { + if (hasPrefixHits && + !lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) { + return absl_ports::ResourceExhaustedError( + "Insufficient disk space to create prefix property!"); + } + + if (!lexicon_.SetProperty(tvi, GetNamespacePropertyId(namespace_id))) { + return absl_ports::ResourceExhaustedError( + "Insufficient disk space to create namespace property!"); + } + + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) { + if (is_full()) { + return absl_ports::ResourceExhaustedError("Hit buffer is full!"); + } + + header_->set_last_added_docid(hit.document_id()); + + Element elt(term_id, hit); + uint32_t cur_size = header_->cur_size(); + Element::Value* valp = hit_buffer_.GetMutableMem<Element::Value>(cur_size, 1); + if (valp == nullptr) { + return absl_ports::ResourceExhaustedError( + "Allocating more space in hit buffer failed!"); + } + *valp = elt.value(); + header_->set_cur_size(cur_size + 1); + + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::StatusOr<uint32_t> LiteIndex::FindTerm( + const std::string& term) const { + char dummy; + uint32_t tvi; + if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) { + return absl_ports::NotFoundError( + absl_ports::StrCat("Could not find ", term, " in the lexicon.")); + } + return tvi; +} + +uint32_t LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask, + bool only_from_prefix_sections, + std::vector<DocHitInfo>* hits_out) { + uint32_t count = 0; + DocumentId last_document_id = kInvalidDocumentId; + for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) { + Element elt(hit_buffer_.array_cast<Element>()[idx]); + if (elt.term_id() != term_id) break; + + const Hit& hit = elt.hit(); + // Check sections. + if (((1u << hit.section_id()) & section_id_mask) == 0) { + continue; + } + // Check prefix section only. + if (only_from_prefix_sections && !hit.is_in_prefix_section()) { + continue; + } + DocumentId document_id = hit.document_id(); + if (document_id != last_document_id) { + count++; + if (hits_out != nullptr) { + hits_out->push_back(DocHitInfo(document_id)); + } + last_document_id = document_id; + } + if (hits_out != nullptr) { + hits_out->back().UpdateSection(hit.section_id(), hit.score()); + } + } + return count; +} + +uint32_t LiteIndex::CountHits(uint32_t term_id) { + return AppendHits(term_id, kSectionIdMaskAll, + /*only_from_prefix_sections=*/false, + /*hits_out=*/nullptr); +} + +bool LiteIndex::is_full() const { + return (header_->cur_size() == options_.hit_buffer_size || + lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction)); +} + +void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const { + absl_ports::StrAppend( + out, IcingStringUtil::StringPrintf("Lite Index\nHit buffer %u/%u\n", + header_->cur_size(), + options_.hit_buffer_size)); + + // Lexicon. + out->append("Lexicon stats:\n"); + lexicon_.GetDebugInfo(verbosity, out); +} + +libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const { + int64_t header_and_hit_buffer_file_size = + filesystem_->GetFileSize(hit_buffer_fd_.get()); + + if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) { + return absl_ports::InternalError( + "Failed to get element size of the LiteIndex's header and hit buffer"); + } + + int64_t lexicon_disk_usage = lexicon_.GetElementsSize(); + if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) { + return absl_ports::InternalError( + "Failed to get element size of LiteIndex's lexicon"); + } + + // On initialization, we grow the file to a padded size first. So this size + // won't count towards the size taken up by elements + size_t header_padded_size = IcingMMapper::page_aligned_size(header_size()); + + return header_and_hit_buffer_file_size - header_padded_size + + lexicon_disk_usage; +} + +uint32_t LiteIndex::Seek(uint32_t term_id) { + // Make searchable by sorting by hit buffer. + uint32_t sort_len = header_->cur_size() - header_->searchable_end(); + if (sort_len > 0) { + IcingTimer timer; + + auto* array_start = + hit_buffer_.GetMutableMem<Element::Value>(0, header_->cur_size()); + Element::Value* sort_start = array_start + header_->searchable_end(); + std::sort(sort_start, array_start + header_->cur_size()); + + // Now merge with previous region. Since the previous region is already + // sorted and deduplicated, optimize the merge by skipping everything less + // than the new region's smallest value. + if (header_->searchable_end() > 0) { + std::inplace_merge(array_start, array_start + header_->searchable_end(), + array_start + header_->cur_size()); + } + ICING_VLOG(2) << IcingStringUtil::StringPrintf( + "Lite index sort and merge %u into %u in %.3fms", sort_len, + header_->searchable_end(), timer.Elapsed() * 1000); + + // Now the entire array is sorted. + header_->set_searchable_end(header_->cur_size()); + + // Update crc in-line. + UpdateChecksum(); + } + + // Binary search for our term_id. Make sure we get the first + // element. Using kBeginSortValue ensures this for the hit value. + Element elt(term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kMaxHitScore)); + + const Element::Value* array = hit_buffer_.array_cast<Element::Value>(); + const Element::Value* ptr = + std::lower_bound(array, array + header_->cur_size(), elt.value()); + return ptr - array; +} + +} // namespace lib +} // namespace icing diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h new file mode 100644 index 0000000..b60a947 --- /dev/null +++ b/icing/index/lite/lite-index.h @@ -0,0 +1,269 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A small index with continuous updates (doesn't need explicit Flush +// to persiste) but has more possibility for corruption. It can always +// detect corruption reliably. + +#ifndef ICING_INDEX_LITE_INDEX_H_ +#define ICING_INDEX_LITE_INDEX_H_ + +#include <cstdint> +#include <limits> +#include <memory> +#include <string> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/file/filesystem.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/hit/hit.h" +#include "icing/legacy/index/icing-array-storage.h" +#include "icing/legacy/index/icing-dynamic-trie.h" +#include "icing/legacy/index/icing-filesystem.h" +#include "icing/legacy/index/icing-lite-index-header.h" +#include "icing/legacy/index/icing-lite-index-options.h" +#include "icing/legacy/index/icing-mmapper.h" +#include "icing/proto/term.pb.h" +#include "icing/schema/section.h" +#include "icing/store/document-id.h" +#include "icing/store/namespace-id.h" +#include "icing/util/bit-util.h" +#include "icing/util/crc32.h" + +namespace icing { +namespace lib { + +class LiteIndex { + public: + // An entry in the hit buffer. + class Element { + public: + // Layout bits: 24 termid + 32 hit value + 8 hit score. + using Value = uint64_t; + + static constexpr int kTermIdBits = 24; + static constexpr int kHitValueBits = sizeof(Hit::Value) * 8; + static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8; + + static const Value kInvalidValue; + + explicit Element(Value v = kInvalidValue) : value_(v) {} + + Element(uint32_t term_id, const Hit& hit) { + static_assert( + kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8, + "LiteIndexElementTooBig"); + + value_ = 0; + // Term id goes into the most significant bits because it takes + // precedent in sorts. + bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits, + &value_); + bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_); + bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_); + } + + uint32_t term_id() const { + return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits, + kTermIdBits); + } + + Hit hit() const { + return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits), + bit_util::BitfieldGet(value_, 0, kHitScoreBits)); + } + + Value value() const { return value_; } + + private: + Value value_; + }; + + using Options = IcingLiteIndexOptions; + + // Updates checksum of subcomponents. + ~LiteIndex(); + + // Creates lite index from storage. The files will be created if they do not + // already exist. + // + // Returns: + // OK on success + // DATA_LOSS if the index was corrupted and cleared + // INTERNAL on I/O error + static libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> Create( + const Options& options, const IcingFilesystem* filesystem); + + // Resets all internal members of the index. Returns OK if all operations were + // successful. + libtextclassifier3::Status Reset(); + + // Advises the OS to cache pages in the index, which will be accessed for a + // query soon. + void Warm(); + + // Syncs all modified files in the index to disk. + // + // Returns: + // OK on success + // INTERNAL on I/O error + libtextclassifier3::Status PersistToDisk(); + + // Calculate the checksum of all sub-components of the LiteIndex + Crc32 ComputeChecksum(); + + // Returns term_id if term found, NOT_FOUND otherwise. + libtextclassifier3::StatusOr<uint32_t> FindTerm( + const std::string& term) const; + + // Returns an iterator for all terms for which 'prefix' is a prefix. + class PrefixIterator { + public: + explicit PrefixIterator(const IcingDynamicTrie::Iterator& delegate) + : delegate_(delegate) {} + bool IsValid() const { return delegate_.IsValid(); } + + void Advance() { delegate_.Advance(); } + + const char* GetKey() const { return delegate_.GetKey(); } + + uint32_t GetValueIndex() const { return delegate_.GetValueIndex(); } + + private: + IcingDynamicTrie::Iterator delegate_; + }; + + PrefixIterator FindTermPrefixes(const std::string& prefix) const { + return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str())); + } + + // Inserts a term with its properties. + // + // Returns: + // A value index on success + // RESOURCE_EXHAUSTED if lexicon is full or no disk space is available + libtextclassifier3::StatusOr<uint32_t> InsertTerm( + const std::string& term, TermMatchType::Code term_match_type, + NamespaceId namespace_id); + + // Updates term properties by setting hasPrefixHits and namespace id of the + // term. + // + // Returns: + // OK on success + // RESOURCE_EXHAUSTED if no disk space is available + libtextclassifier3::Status UpdateTermProperties(uint32_t tvi, + bool hasPrefixHits, + NamespaceId namespace_id); + + // Append hit to buffer. term_id must be encoded using the same term_id_codec + // supplied to the index constructor. Returns non-OK if hit cannot be added + // (either due to hit buffer or file system capacity reached). + libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit); + + // Add all hits with term_id from the sections specified in section_id_mask, + // skipping hits in non-prefix sections if only_from_prefix_sections is true, + // to hits_out. + uint32_t AppendHits(uint32_t term_id, SectionIdMask section_id_mask, + bool only_from_prefix_sections, + std::vector<DocHitInfo>* hits_out); + + // Returns the hit count of the term. + uint32_t CountHits(uint32_t term_id); + + // Check if buffer has reached its capacity. + bool is_full() const; + + constexpr static uint32_t max_hit_buffer_size() { + return std::numeric_limits<uint32_t>::max() / sizeof(LiteIndex::Element); + } + + // We keep track of the last added document_id. This is always the largest + // document_id that has been added because hits can only be added in order of + // increasing document_id. + DocumentId last_added_document_id() const { + return header_->last_added_docid(); + } + + const IcingDynamicTrie& lexicon() const { return lexicon_; } + + // Returns debug information for the index in out. + // verbosity <= 0, simplest debug information - size of lexicon, hit buffer + // verbosity > 0, more detailed debug information from the lexicon. + void GetDebugInfo(int verbosity, std::string* out) const; + + // Returns the byte size of all the elements held in the index. This excludes + // the size of any internal metadata of the index, e.g. the index's header. + // + // Returns: + // Byte size on success + // INTERNAL_ERROR on IO error + libtextclassifier3::StatusOr<int64_t> GetElementsSize() const; + + private: + static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions(); + + LiteIndex(const Options& options, const IcingFilesystem* filesystem); + + // Initializes lite index from storage. Must be called exactly once after + // object construction. + // + // Returns: + // OK on success + // DATA_LOSS if the index was corrupted and cleared + // INTERNAL on I/O error + libtextclassifier3::Status Initialize(); + + bool initialized() const { return header_ != nullptr; } + + // Sets the computed checksum in the header + void UpdateChecksum(); + + // Returns the position of the first element with term_id, or the size of the + // hit buffer if term_id is not present. + uint32_t Seek(uint32_t term_id); + + // File descriptor that points to where the header and hit buffer are written + // to. + ScopedFd hit_buffer_fd_; + + // Mmapped region past the header that stores the hits. + IcingArrayStorage hit_buffer_; + + // Crc checksum of the hits, excludes the header. + uint32_t hit_buffer_crc_; + + // Trie that maps indexed terms to their term id + IcingDynamicTrie lexicon_; + + // TODO(b/140437260): Port over to MemoryMappedFile + // Memory mapped region of the underlying file that reflects the header. + IcingMMapper header_mmap_; + + // Wrapper around the mmapped header that contains stats on the lite index. + std::unique_ptr<IcingLiteIndex_Header> header_; + + // Options used to initialize the LiteIndex. + const Options options_; + + // TODO(b/139087650) Move to icing::Filesystem + const IcingFilesystem* const filesystem_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_INDEX_LITE_INDEX_H_ |