aboutsummaryrefslogtreecommitdiff
path: root/icing/index/lite
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2020-07-21 14:26:01 -0700
committerCassie Wang <cassiewang@google.com>2020-07-24 14:19:26 -0700
commitc994b6ea30c9be8976da0b1bf6a8923907ff903f (patch)
treef69970caf14ff26cfea1f3a573c2c27a7cd83e3d /icing/index/lite
parent09d66401215254a2bdfc134009039636054d28d2 (diff)
downloadicing-c994b6ea30c9be8976da0b1bf6a8923907ff903f.tar.gz
Pull upstream changes.
Change-Id: Ieed20fd00a7c00778045434ae1b7c9e019a6c369
Diffstat (limited to 'icing/index/lite')
-rw-r--r--icing/index/lite/lite-index.cc457
-rw-r--r--icing/index/lite/lite-index.h269
2 files changed, 726 insertions, 0 deletions
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
new file mode 100644
index 0000000..a72402e
--- /dev/null
+++ b/icing/index/lite/lite-index.cc
@@ -0,0 +1,457 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite/lite-index.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/term-property-id.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-lite-index-header.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Point at which we declare the trie full.
+constexpr double kTrieFullFraction = 0.95;
+
+std::string MakeHitBufferFilename(const std::string& filename_base) {
+ return filename_base + "hb";
+}
+
+size_t header_size() { return sizeof(IcingLiteIndex_HeaderImpl::HeaderData); }
+
+} // namespace
+
+const LiteIndex::Element::Value LiteIndex::Element::kInvalidValue =
+ LiteIndex::Element(0, Hit()).value();
+
+libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> LiteIndex::Create(
+ const LiteIndex::Options& options, const IcingFilesystem* filesystem) {
+ ICING_RETURN_ERROR_IF_NULL(filesystem);
+
+ std::unique_ptr<LiteIndex> lite_index =
+ std::unique_ptr<LiteIndex>(new LiteIndex(options, filesystem));
+ ICING_RETURN_IF_ERROR(lite_index->Initialize());
+ return std::move(lite_index);
+}
+
+// size is max size in elements. An appropriate lexicon and display
+// mapping size will be chosen based on hit buffer size.
+LiteIndex::LiteIndex(const LiteIndex::Options& options,
+ const IcingFilesystem* filesystem)
+ : hit_buffer_(*filesystem),
+ hit_buffer_crc_(0),
+ lexicon_(options.filename_base + "lexicon", MakeTrieRuntimeOptions(),
+ filesystem),
+ header_mmap_(false, MAP_SHARED),
+ options_(options),
+ filesystem_(filesystem) {}
+
+LiteIndex::~LiteIndex() {
+ if (initialized()) {
+ libtextclassifier3::Status unused = PersistToDisk();
+ }
+}
+
+IcingDynamicTrie::RuntimeOptions LiteIndex::MakeTrieRuntimeOptions() {
+ return IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+ IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc);
+}
+
+libtextclassifier3::Status LiteIndex::Initialize() {
+ // Size of hit buffer's header struct, rounded up to the nearest number of
+ // system memory pages.
+ const size_t header_padded_size =
+ IcingMMapper::page_aligned_size(header_size());
+
+ // Variable declarations cannot cross goto jumps, so declare these up top.
+ libtextclassifier3::Status status;
+ uint64_t file_size;
+ IcingTimer timer;
+
+ if (!lexicon_.CreateIfNotExist(options_.lexicon_options) ||
+ !lexicon_.Init()) {
+ return absl_ports::InternalError("Failed to initialize lexicon trie");
+ }
+
+ hit_buffer_fd_.reset(filesystem_->OpenForWrite(
+ MakeHitBufferFilename(options_.filename_base).c_str()));
+ if (!hit_buffer_fd_.is_valid()) {
+ status = absl_ports::InternalError("Failed to open hit buffer file");
+ goto error;
+ }
+
+ file_size = filesystem_->GetFileSize(hit_buffer_fd_.get());
+ if (file_size == IcingFilesystem::kBadFileSize) {
+ status = absl_ports::InternalError("Failed to query hit buffer file size");
+ goto error;
+ }
+
+ if (file_size < header_padded_size) {
+ if (file_size != 0) {
+ status = absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Hit buffer had unexpected size %" PRIu64, file_size));
+ goto error;
+ }
+
+ ICING_VLOG(2) << "Creating new hit buffer";
+ // Make sure files are fresh.
+ if (!lexicon_.Remove() ||
+ !lexicon_.CreateIfNotExist(options_.lexicon_options) ||
+ !lexicon_.Init()) {
+ status =
+ absl_ports::InternalError("Failed to refresh lexicon during clear");
+ goto error;
+ }
+
+ // Create fresh hit buffer by first emptying the hit buffer file and then
+ // allocating header_padded_size of the cleared space.
+ if (!filesystem_->Truncate(hit_buffer_fd_.get(), 0) ||
+ !filesystem_->Truncate(hit_buffer_fd_.get(), header_padded_size)) {
+ status = absl_ports::InternalError("Failed to truncate hit buffer file");
+ goto error;
+ }
+
+ // Set up header.
+ header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_ = std::make_unique<IcingLiteIndex_HeaderImpl>(
+ reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>(
+ header_mmap_.address()));
+ header_->Reset();
+
+ if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
+ sizeof(Element::Value), header_->cur_size(),
+ options_.hit_buffer_size, &hit_buffer_crc_, true)) {
+ status = absl_ports::InternalError("Failed to initialize new hit buffer");
+ goto error;
+ }
+
+ UpdateChecksum();
+ } else {
+ header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_ = std::make_unique<IcingLiteIndex_HeaderImpl>(
+ reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>(
+ header_mmap_.address()));
+
+ if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
+ sizeof(Element::Value), header_->cur_size(),
+ options_.hit_buffer_size, &hit_buffer_crc_, true)) {
+ status = absl_ports::InternalError(
+ "Failed to re-initialize existing hit buffer");
+ goto error;
+ }
+
+ // Check integrity.
+ if (!header_->check_magic()) {
+ status = absl_ports::InternalError("Lite index header magic mismatch");
+ goto error;
+ }
+ Crc32 crc = ComputeChecksum();
+ if (crc.Get() != header_->lite_index_crc()) {
+ status = absl_ports::DataLossError(
+ IcingStringUtil::StringPrintf("Lite index crc check failed: %u vs %u",
+ crc.Get(), header_->lite_index_crc()));
+ goto error;
+ }
+ }
+
+ ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index init ok in %.3fms",
+ timer.Elapsed() * 1000);
+ return status;
+
+error:
+ header_ = nullptr;
+ header_mmap_.Unmap();
+ lexicon_.Close();
+ hit_buffer_crc_ = 0;
+ hit_buffer_.Reset();
+ hit_buffer_fd_.reset();
+ if (status.ok()) {
+ return absl_ports::InternalError(
+ "Error handling code ran but status was ok");
+ }
+ return status;
+}
+
+Crc32 LiteIndex::ComputeChecksum() {
+ IcingTimer timer;
+
+ // Update crcs.
+ uint32_t dependent_crcs[2];
+ hit_buffer_.UpdateCrc();
+ dependent_crcs[0] = hit_buffer_crc_;
+ dependent_crcs[1] = lexicon_.UpdateCrc();
+
+ // Compute the master crc.
+
+ // Header crc, excluding the actual crc field.
+ Crc32 all_crc(header_->CalculateHeaderCrc());
+ all_crc.Append(std::string_view(reinterpret_cast<const char*>(dependent_crcs),
+ sizeof(dependent_crcs)));
+ ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+ "Lite index crc computed in %.3fms", timer.Elapsed() * 1000);
+
+ return all_crc;
+}
+
+libtextclassifier3::Status LiteIndex::Reset() {
+ IcingTimer timer;
+
+ // TODO(b/140436942): When these components have been changed to return errors
+ // they should be propagated from here.
+ lexicon_.Clear();
+ hit_buffer_.Clear();
+ header_->Reset();
+ UpdateChecksum();
+
+ ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index clear in %.3fms",
+ timer.Elapsed() * 1000);
+ return libtextclassifier3::Status::OK;
+}
+
+void LiteIndex::Warm() {
+ hit_buffer_.Warm();
+ lexicon_.Warm();
+}
+
+libtextclassifier3::Status LiteIndex::PersistToDisk() {
+ bool success = true;
+ if (!lexicon_.Sync()) {
+ ICING_VLOG(1) << "Failed to sync the lexicon.";
+ success = false;
+ }
+ hit_buffer_.Sync();
+ UpdateChecksum();
+ header_mmap_.Sync();
+
+ return (success) ? libtextclassifier3::Status::OK
+ : absl_ports::InternalError(
+ "Unable to sync lite index components.");
+}
+
+void LiteIndex::UpdateChecksum() {
+ header_->set_lite_index_crc(ComputeChecksum().Get());
+}
+
+libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
+ const std::string& term, TermMatchType::Code term_match_type,
+ NamespaceId namespace_id) {
+ uint32_t tvi;
+ if (!lexicon_.Insert(term.c_str(), "", &tvi, false)) {
+ return absl_ports::ResourceExhaustedError(
+ absl_ports::StrCat("Unable to add term ", term, " to lexicon!"));
+ }
+ ICING_RETURN_IF_ERROR(UpdateTermProperties(
+ tvi, term_match_type == TermMatchType::PREFIX, namespace_id));
+ return tvi;
+}
+
+libtextclassifier3::Status LiteIndex::UpdateTermProperties(
+ uint32_t tvi, bool hasPrefixHits, NamespaceId namespace_id) {
+ if (hasPrefixHits &&
+ !lexicon_.SetProperty(tvi, GetHasHitsInPrefixSectionPropertyId())) {
+ return absl_ports::ResourceExhaustedError(
+ "Insufficient disk space to create prefix property!");
+ }
+
+ if (!lexicon_.SetProperty(tvi, GetNamespacePropertyId(namespace_id))) {
+ return absl_ports::ResourceExhaustedError(
+ "Insufficient disk space to create namespace property!");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
+ if (is_full()) {
+ return absl_ports::ResourceExhaustedError("Hit buffer is full!");
+ }
+
+ header_->set_last_added_docid(hit.document_id());
+
+ Element elt(term_id, hit);
+ uint32_t cur_size = header_->cur_size();
+ Element::Value* valp = hit_buffer_.GetMutableMem<Element::Value>(cur_size, 1);
+ if (valp == nullptr) {
+ return absl_ports::ResourceExhaustedError(
+ "Allocating more space in hit buffer failed!");
+ }
+ *valp = elt.value();
+ header_->set_cur_size(cur_size + 1);
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<uint32_t> LiteIndex::FindTerm(
+ const std::string& term) const {
+ char dummy;
+ uint32_t tvi;
+ if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Could not find ", term, " in the lexicon."));
+ }
+ return tvi;
+}
+
+uint32_t LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ std::vector<DocHitInfo>* hits_out) {
+ uint32_t count = 0;
+ DocumentId last_document_id = kInvalidDocumentId;
+ for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
+ Element elt(hit_buffer_.array_cast<Element>()[idx]);
+ if (elt.term_id() != term_id) break;
+
+ const Hit& hit = elt.hit();
+ // Check sections.
+ if (((1u << hit.section_id()) & section_id_mask) == 0) {
+ continue;
+ }
+ // Check prefix section only.
+ if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
+ continue;
+ }
+ DocumentId document_id = hit.document_id();
+ if (document_id != last_document_id) {
+ count++;
+ if (hits_out != nullptr) {
+ hits_out->push_back(DocHitInfo(document_id));
+ }
+ last_document_id = document_id;
+ }
+ if (hits_out != nullptr) {
+ hits_out->back().UpdateSection(hit.section_id(), hit.score());
+ }
+ }
+ return count;
+}
+
+uint32_t LiteIndex::CountHits(uint32_t term_id) {
+ return AppendHits(term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ /*hits_out=*/nullptr);
+}
+
+bool LiteIndex::is_full() const {
+ return (header_->cur_size() == options_.hit_buffer_size ||
+ lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
+}
+
+void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
+ absl_ports::StrAppend(
+ out, IcingStringUtil::StringPrintf("Lite Index\nHit buffer %u/%u\n",
+ header_->cur_size(),
+ options_.hit_buffer_size));
+
+ // Lexicon.
+ out->append("Lexicon stats:\n");
+ lexicon_.GetDebugInfo(verbosity, out);
+}
+
+libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
+ int64_t header_and_hit_buffer_file_size =
+ filesystem_->GetFileSize(hit_buffer_fd_.get());
+
+ if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError(
+ "Failed to get element size of the LiteIndex's header and hit buffer");
+ }
+
+ int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
+ if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) {
+ return absl_ports::InternalError(
+ "Failed to get element size of LiteIndex's lexicon");
+ }
+
+ // On initialization, we grow the file to a padded size first. So this size
+ // won't count towards the size taken up by elements
+ size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
+
+ return header_and_hit_buffer_file_size - header_padded_size +
+ lexicon_disk_usage;
+}
+
+uint32_t LiteIndex::Seek(uint32_t term_id) {
+ // Make searchable by sorting by hit buffer.
+ uint32_t sort_len = header_->cur_size() - header_->searchable_end();
+ if (sort_len > 0) {
+ IcingTimer timer;
+
+ auto* array_start =
+ hit_buffer_.GetMutableMem<Element::Value>(0, header_->cur_size());
+ Element::Value* sort_start = array_start + header_->searchable_end();
+ std::sort(sort_start, array_start + header_->cur_size());
+
+ // Now merge with previous region. Since the previous region is already
+ // sorted and deduplicated, optimize the merge by skipping everything less
+ // than the new region's smallest value.
+ if (header_->searchable_end() > 0) {
+ std::inplace_merge(array_start, array_start + header_->searchable_end(),
+ array_start + header_->cur_size());
+ }
+ ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+ "Lite index sort and merge %u into %u in %.3fms", sort_len,
+ header_->searchable_end(), timer.Elapsed() * 1000);
+
+ // Now the entire array is sorted.
+ header_->set_searchable_end(header_->cur_size());
+
+ // Update crc in-line.
+ UpdateChecksum();
+ }
+
+ // Binary search for our term_id. Make sure we get the first
+ // element. Using kBeginSortValue ensures this for the hit value.
+ Element elt(term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kMaxHitScore));
+
+ const Element::Value* array = hit_buffer_.array_cast<Element::Value>();
+ const Element::Value* ptr =
+ std::lower_bound(array, array + header_->cur_size(), elt.value());
+ return ptr - array;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
new file mode 100644
index 0000000..b60a947
--- /dev/null
+++ b/icing/index/lite/lite-index.h
@@ -0,0 +1,269 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A small index with continuous updates (doesn't need explicit Flush
+// to persiste) but has more possibility for corruption. It can always
+// detect corruption reliably.
+
+#ifndef ICING_INDEX_LITE_INDEX_H_
+#define ICING_INDEX_LITE_INDEX_H_
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-lite-index-header.h"
+#include "icing/legacy/index/icing-lite-index-options.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/bit-util.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+class LiteIndex {
+ public:
+ // An entry in the hit buffer.
+ class Element {
+ public:
+ // Layout bits: 24 termid + 32 hit value + 8 hit score.
+ using Value = uint64_t;
+
+ static constexpr int kTermIdBits = 24;
+ static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
+ static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8;
+
+ static const Value kInvalidValue;
+
+ explicit Element(Value v = kInvalidValue) : value_(v) {}
+
+ Element(uint32_t term_id, const Hit& hit) {
+ static_assert(
+ kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8,
+ "LiteIndexElementTooBig");
+
+ value_ = 0;
+ // Term id goes into the most significant bits because it takes
+ // precedent in sorts.
+ bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits,
+ &value_);
+ bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_);
+ bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_);
+ }
+
+ uint32_t term_id() const {
+ return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits,
+ kTermIdBits);
+ }
+
+ Hit hit() const {
+ return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits),
+ bit_util::BitfieldGet(value_, 0, kHitScoreBits));
+ }
+
+ Value value() const { return value_; }
+
+ private:
+ Value value_;
+ };
+
+ using Options = IcingLiteIndexOptions;
+
+ // Updates checksum of subcomponents.
+ ~LiteIndex();
+
+ // Creates lite index from storage. The files will be created if they do not
+ // already exist.
+ //
+ // Returns:
+ // OK on success
+ // DATA_LOSS if the index was corrupted and cleared
+ // INTERNAL on I/O error
+ static libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> Create(
+ const Options& options, const IcingFilesystem* filesystem);
+
+ // Resets all internal members of the index. Returns OK if all operations were
+ // successful.
+ libtextclassifier3::Status Reset();
+
+ // Advises the OS to cache pages in the index, which will be accessed for a
+ // query soon.
+ void Warm();
+
+ // Syncs all modified files in the index to disk.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL on I/O error
+ libtextclassifier3::Status PersistToDisk();
+
+ // Calculate the checksum of all sub-components of the LiteIndex
+ Crc32 ComputeChecksum();
+
+ // Returns term_id if term found, NOT_FOUND otherwise.
+ libtextclassifier3::StatusOr<uint32_t> FindTerm(
+ const std::string& term) const;
+
+ // Returns an iterator for all terms for which 'prefix' is a prefix.
+ class PrefixIterator {
+ public:
+ explicit PrefixIterator(const IcingDynamicTrie::Iterator& delegate)
+ : delegate_(delegate) {}
+ bool IsValid() const { return delegate_.IsValid(); }
+
+ void Advance() { delegate_.Advance(); }
+
+ const char* GetKey() const { return delegate_.GetKey(); }
+
+ uint32_t GetValueIndex() const { return delegate_.GetValueIndex(); }
+
+ private:
+ IcingDynamicTrie::Iterator delegate_;
+ };
+
+ PrefixIterator FindTermPrefixes(const std::string& prefix) const {
+ return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str()));
+ }
+
+ // Inserts a term with its properties.
+ //
+ // Returns:
+ // A value index on success
+ // RESOURCE_EXHAUSTED if lexicon is full or no disk space is available
+ libtextclassifier3::StatusOr<uint32_t> InsertTerm(
+ const std::string& term, TermMatchType::Code term_match_type,
+ NamespaceId namespace_id);
+
+ // Updates term properties by setting hasPrefixHits and namespace id of the
+ // term.
+ //
+ // Returns:
+ // OK on success
+ // RESOURCE_EXHAUSTED if no disk space is available
+ libtextclassifier3::Status UpdateTermProperties(uint32_t tvi,
+ bool hasPrefixHits,
+ NamespaceId namespace_id);
+
+ // Append hit to buffer. term_id must be encoded using the same term_id_codec
+ // supplied to the index constructor. Returns non-OK if hit cannot be added
+ // (either due to hit buffer or file system capacity reached).
+ libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit);
+
+ // Add all hits with term_id from the sections specified in section_id_mask,
+ // skipping hits in non-prefix sections if only_from_prefix_sections is true,
+ // to hits_out.
+ uint32_t AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ std::vector<DocHitInfo>* hits_out);
+
+ // Returns the hit count of the term.
+ uint32_t CountHits(uint32_t term_id);
+
+ // Check if buffer has reached its capacity.
+ bool is_full() const;
+
+ constexpr static uint32_t max_hit_buffer_size() {
+ return std::numeric_limits<uint32_t>::max() / sizeof(LiteIndex::Element);
+ }
+
+ // We keep track of the last added document_id. This is always the largest
+ // document_id that has been added because hits can only be added in order of
+ // increasing document_id.
+ DocumentId last_added_document_id() const {
+ return header_->last_added_docid();
+ }
+
+ const IcingDynamicTrie& lexicon() const { return lexicon_; }
+
+ // Returns debug information for the index in out.
+ // verbosity <= 0, simplest debug information - size of lexicon, hit buffer
+ // verbosity > 0, more detailed debug information from the lexicon.
+ void GetDebugInfo(int verbosity, std::string* out) const;
+
+ // Returns the byte size of all the elements held in the index. This excludes
+ // the size of any internal metadata of the index, e.g. the index's header.
+ //
+ // Returns:
+ // Byte size on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+
+ private:
+ static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
+
+ LiteIndex(const Options& options, const IcingFilesystem* filesystem);
+
+ // Initializes lite index from storage. Must be called exactly once after
+ // object construction.
+ //
+ // Returns:
+ // OK on success
+ // DATA_LOSS if the index was corrupted and cleared
+ // INTERNAL on I/O error
+ libtextclassifier3::Status Initialize();
+
+ bool initialized() const { return header_ != nullptr; }
+
+ // Sets the computed checksum in the header
+ void UpdateChecksum();
+
+ // Returns the position of the first element with term_id, or the size of the
+ // hit buffer if term_id is not present.
+ uint32_t Seek(uint32_t term_id);
+
+ // File descriptor that points to where the header and hit buffer are written
+ // to.
+ ScopedFd hit_buffer_fd_;
+
+ // Mmapped region past the header that stores the hits.
+ IcingArrayStorage hit_buffer_;
+
+ // Crc checksum of the hits, excludes the header.
+ uint32_t hit_buffer_crc_;
+
+ // Trie that maps indexed terms to their term id
+ IcingDynamicTrie lexicon_;
+
+ // TODO(b/140437260): Port over to MemoryMappedFile
+ // Memory mapped region of the underlying file that reflects the header.
+ IcingMMapper header_mmap_;
+
+ // Wrapper around the mmapped header that contains stats on the lite index.
+ std::unique_ptr<IcingLiteIndex_Header> header_;
+
+ // Options used to initialize the LiteIndex.
+ const Options options_;
+
+ // TODO(b/139087650) Move to icing::Filesystem
+ const IcingFilesystem* const filesystem_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_LITE_INDEX_H_