aboutsummaryrefslogtreecommitdiff
path: root/icing
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2020-07-21 14:26:01 -0700
committerCassie Wang <cassiewang@google.com>2020-07-24 14:19:26 -0700
commitc994b6ea30c9be8976da0b1bf6a8923907ff903f (patch)
treef69970caf14ff26cfea1f3a573c2c27a7cd83e3d /icing
parent09d66401215254a2bdfc134009039636054d28d2 (diff)
downloadicing-c994b6ea30c9be8976da0b1bf6a8923907ff903f.tar.gz
Pull upstream changes.
Change-Id: Ieed20fd00a7c00778045434ae1b7c9e019a6c369
Diffstat (limited to 'icing')
-rw-r--r--icing/file/file-backed-vector.h12
-rw-r--r--icing/file/file-backed-vector_test.cc48
-rw-r--r--icing/file/memory-mapped-file.cc28
-rw-r--r--icing/file/memory-mapped-file.h14
-rw-r--r--icing/index/index.cc2
-rw-r--r--icing/index/index.h2
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-term.h2
-rw-r--r--icing/index/lite/lite-index.cc (renamed from icing/index/lite-index.cc)2
-rw-r--r--icing/index/lite/lite-index.h (renamed from icing/index/lite-index.h)0
-rw-r--r--icing/index/main/index-block.cc201
-rw-r--r--icing/index/main/index-block.h215
-rw-r--r--icing/index/main/index-block_test.cc354
-rw-r--r--icing/index/main/posting-list-free.h (renamed from icing/index/posting-list-free.h)10
-rw-r--r--icing/index/main/posting-list-free_test.cc (renamed from icing/index/posting-list-free_test.cc)4
-rw-r--r--icing/index/main/posting-list-used.cc (renamed from icing/index/posting-list-used.cc)210
-rw-r--r--icing/index/main/posting-list-used.h (renamed from icing/index/posting-list-used.h)94
-rw-r--r--icing/index/main/posting-list-used_test.cc (renamed from icing/index/posting-list-used_test.cc)309
-rw-r--r--icing/index/main/posting-list-utils.cc (renamed from icing/index/posting-list-utils.cc)2
-rw-r--r--icing/index/main/posting-list-utils.h (renamed from icing/index/posting-list-utils.h)9
-rw-r--r--icing/jni/icing-search-engine-jni.cc12
-rw-r--r--icing/legacy/index/icing-dynamic-trie.h1
-rw-r--r--icing/legacy/index/icing-dynamic-trie_test.cc921
-rw-r--r--icing/legacy/index/icing-filesystem.h8
-rw-r--r--icing/legacy/index/icing-mmapper.h4
-rw-r--r--icing/legacy/index/icing-storage.h1
-rw-r--r--icing/testing/hit-test-utils.cc58
-rw-r--r--icing/testing/hit-test-utils.h43
-rw-r--r--icing/text_classifier/lib3/utils/base/logging.h1
-rw-r--r--icing/text_classifier/lib3/utils/base/statusor.h72
29 files changed, 2395 insertions, 244 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 27d03b2..e4ec0cd 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -181,7 +181,9 @@ class FileBackedVector {
// OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size
libtextclassifier3::Status Set(int32_t idx, const T& value);
- // Resizes to first len elements. The crc is not updated on truncation.
+ // Resizes to first len elements. The crc is cleared on truncation and will be
+ // updated on destruction, or once the client calls ComputeChecksum() or
+ // PersistToDisk().
//
// Returns:
// OUT_OF_RANGE_ERROR if len < 0 or >= num_elements()
@@ -577,6 +579,13 @@ libtextclassifier3::Status FileBackedVector<T>::TruncateTo(
new_num_elements, header_->num_elements));
}
+ ICING_VLOG(2)
+ << "FileBackedVector truncating, need to recalculate entire checksum";
+ changes_.clear();
+ saved_original_buffer_.clear();
+ changes_end_ = 0;
+ header_->vector_checksum = 0;
+
header_->num_elements = new_num_elements;
return libtextclassifier3::Status::OK;
}
@@ -653,6 +662,7 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
}
cur_offset += sizeof(T);
}
+
if (!changes_.empty()) {
ICING_VLOG(2) << IcingStringUtil::StringPrintf(
"Array update partial crcs %d truncated %d overlapped %d duplicate %d",
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index 7561b57..6c3b931 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -158,8 +158,8 @@ TEST_F(FileBackedVectorTest, SimpleShared) {
// Truncate the content
ICING_EXPECT_OK(vector->TruncateTo(0));
- // We don't automatically update the crc when we truncate.
- EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
+ // Crc is cleared after truncation and reset to 0.
+ EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
EXPECT_EQ(0u, vector->num_elements());
}
@@ -409,10 +409,10 @@ TEST_F(FileBackedVectorTest, TruncateTo) {
EXPECT_EQ(1, vector->num_elements());
EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
- // Truncating doesn't cause the checksum to be updated.
+ // Truncating clears the checksum and resets it to 0
ICING_EXPECT_OK(vector->TruncateTo(0));
EXPECT_EQ(0, vector->num_elements());
- EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
+ EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
// Can't truncate past end.
EXPECT_THAT(vector->TruncateTo(100),
@@ -423,6 +423,46 @@ TEST_F(FileBackedVectorTest, TruncateTo) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
+TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<float>> vector,
+ FileBackedVector<float>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ ICING_ASSERT_OK(vector->Set(0, 1.0));
+ ICING_ASSERT_OK(vector->Set(1, 2.0));
+ ICING_ASSERT_OK(vector->Set(2, 2.0));
+ ICING_ASSERT_OK(vector->Set(3, 2.0));
+ ICING_ASSERT_OK(vector->Set(4, 2.0));
+ } // Destroying the vector should trigger a checksum of the 5 elements
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<float>> vector,
+ FileBackedVector<float>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_EQ(5, vector->num_elements());
+ ICING_EXPECT_OK(vector->TruncateTo(4));
+ EXPECT_EQ(4, vector->num_elements());
+ } // Destroying the vector should update the checksum to 4 elements
+
+ // Creating again should double check that our checksum of 4 elements matches
+ // what was previously saved.
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<float>> vector,
+ FileBackedVector<float>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_EQ(vector->num_elements(), 4);
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc
index 34365a9..bda01f2 100644
--- a/icing/file/memory-mapped-file.cc
+++ b/icing/file/memory-mapped-file.cc
@@ -37,6 +37,23 @@ MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem,
file_path_(file_path),
strategy_(mmap_strategy) {}
+MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other)
+ // Make sure that mmap_result_ is a nullptr before we call Swap. We don't
+ // care what values the remaining members hold before we swap into other,
+ // but if mmap_result_ holds a non-NULL value before we initialized anything
+ // then other will try to free memory at that address when it's destroyed!
+ : mmap_result_(nullptr) {
+ Swap(&other);
+}
+
+MemoryMappedFile& MemoryMappedFile::operator=(MemoryMappedFile&& other) {
+ // Swap all of our elements with other. This will ensure that both this now
+ // holds other's previous resources and that this's previous resources will be
+ // properly freed when other is destructed at the end of this function.
+ Swap(&other);
+ return *this;
+}
+
MemoryMappedFile::~MemoryMappedFile() { Unmap(); }
void MemoryMappedFile::MemoryMappedFile::Unmap() {
@@ -167,5 +184,16 @@ libtextclassifier3::Status MemoryMappedFile::OptimizeFor(
return libtextclassifier3::Status::OK;
}
+void MemoryMappedFile::Swap(MemoryMappedFile* other) {
+ std::swap(filesystem_, other->filesystem_);
+ std::swap(file_path_, other->file_path_);
+ std::swap(strategy_, other->strategy_);
+ std::swap(file_offset_, other->file_offset_);
+ std::swap(region_, other->region_);
+ std::swap(region_size_, other->region_size_);
+ std::swap(adjusted_mmap_size_, other->adjusted_mmap_size_);
+ std::swap(mmap_result_, other->mmap_result_);
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/file/memory-mapped-file.h b/icing/file/memory-mapped-file.h
index 5a52368..1d31a42 100644
--- a/icing/file/memory-mapped-file.h
+++ b/icing/file/memory-mapped-file.h
@@ -79,7 +79,10 @@ class MemoryMappedFile {
// file_path : Full path of the file that needs to be memory-mapped.
MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path,
Strategy mmap_strategy);
-
+ MemoryMappedFile(const MemoryMappedFile& other) = delete;
+ MemoryMappedFile(MemoryMappedFile&& other);
+ MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete;
+ MemoryMappedFile& operator=(MemoryMappedFile&& other);
// Frees any region that is still memory-mapped region.
~MemoryMappedFile();
@@ -134,10 +137,13 @@ class MemoryMappedFile {
Strategy strategy() const { return strategy_; }
private:
+ // Swaps the contents of this with other.
+ void Swap(MemoryMappedFile* other);
+
// Cached constructor params.
- const Filesystem* const filesystem_;
- const std::string file_path_;
- const Strategy strategy_;
+ const Filesystem* filesystem_;
+ std::string file_path_;
+ Strategy strategy_;
// Offset within the file at which the current memory-mapped region starts.
size_t file_offset_ = 0;
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 927acaf..d4a2508 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -26,7 +26,7 @@
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator-term.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/lite-index.h"
+#include "icing/index/lite/lite-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/core/icing-string-util.h"
diff --git a/icing/index/index.h b/icing/index/index.h
index f30c8ad..d8e409c 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -25,7 +25,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/lite-index.h"
+#include "icing/index/lite/lite-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-filesystem.h"
diff --git a/icing/index/iterator/doc-hit-info-iterator-term.h b/icing/index/iterator/doc-hit-info-iterator-term.h
index 7d02fc2..21d1dd6 100644
--- a/icing/index/iterator/doc-hit-info-iterator-term.h
+++ b/icing/index/iterator/doc-hit-info-iterator-term.h
@@ -21,7 +21,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/lite-index.h"
+#include "icing/index/lite/lite-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/schema/section.h"
diff --git a/icing/index/lite-index.cc b/icing/index/lite/lite-index.cc
index 489c53d..a72402e 100644
--- a/icing/index/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/lite-index.h"
+#include "icing/index/lite/lite-index.h"
#include <inttypes.h>
#include <stddef.h>
diff --git a/icing/index/lite-index.h b/icing/index/lite/lite-index.h
index b60a947..b60a947 100644
--- a/icing/index/lite-index.h
+++ b/icing/index/lite/lite-index.h
diff --git a/icing/index/main/index-block.cc b/icing/index/main/index-block.cc
new file mode 100644
index 0000000..9d7df3c
--- /dev/null
+++ b/icing/index/main/index-block.cc
@@ -0,0 +1,201 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/index-block.h"
+
+#include <inttypes.h>
+
+#include <algorithm>
+#include <limits>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/main/posting-list-free.h"
+#include "icing/index/main/posting-list-utils.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/math-util.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+libtextclassifier3::Status ValidatePostingListBytes(uint32_t posting_list_bytes,
+ uint32_t block_size) {
+ if (posting_list_bytes >
+ IndexBlock::CalculateMaxPostingListBytes(block_size) ||
+ !posting_list_utils::IsValidPostingListSize(posting_list_bytes)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Requested posting list size %d is illegal for a flash block with max "
+ "posting list size of %d",
+ posting_list_bytes,
+ IndexBlock::CalculateMaxPostingListBytes(block_size)));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace
+
+uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock(
+ uint32_t block_size, int posting_list_index_bits) {
+ // Assume 50% compressed and most don't have scores.
+ uint32_t bytes_per_hit = sizeof(Hit::Value) / 2;
+ return (block_size - sizeof(BlockHeader)) /
+ ((1u << posting_list_index_bits) * bytes_per_hit);
+}
+
+libtextclassifier3::StatusOr<IndexBlock>
+IndexBlock::CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
+ std::string_view file_path,
+ off_t offset,
+ uint32_t block_size) {
+ if (block_size < sizeof(BlockHeader)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Provided block_size %d is too small to fit even the BlockHeader!",
+ block_size));
+ }
+ MemoryMappedFile mmapped_file(
+ filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
+ IndexBlock block(std::move(mmapped_file));
+ ICING_RETURN_IF_ERROR(
+ ValidatePostingListBytes(block.get_posting_list_bytes(), block_size));
+ return block;
+}
+
+libtextclassifier3::StatusOr<IndexBlock>
+IndexBlock::CreateFromUninitializedRegion(const Filesystem& filesystem,
+ std::string_view file_path,
+ off_t offset, uint32_t block_size,
+ uint32_t posting_list_bytes) {
+ if (block_size < sizeof(BlockHeader)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Provided block_size %d is too small to fit even the BlockHeader!",
+ block_size));
+ }
+ ICING_RETURN_IF_ERROR(
+ ValidatePostingListBytes(posting_list_bytes, block_size));
+ MemoryMappedFile mmapped_file(
+ filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
+ IndexBlock block(std::move(mmapped_file));
+ // Safe to ignore the return value of Reset. Reset returns an error if
+ // posting_list_bytes is invalid, but this function ensures that
+ // posting_list_bytes is valid thanks to the call to ValidatePostingListBytes
+ // above.
+ block.Reset(posting_list_bytes);
+ return block;
+}
+
+IndexBlock::IndexBlock(MemoryMappedFile mmapped_block)
+ : header_(reinterpret_cast<BlockHeader*>(mmapped_block.mutable_region())),
+ posting_lists_start_ptr_(mmapped_block.mutable_region() +
+ sizeof(BlockHeader)),
+ block_size_in_bytes_(mmapped_block.region_size()),
+ mmapped_block_(std::move(mmapped_block)) {}
+
+libtextclassifier3::Status IndexBlock::Reset(int posting_list_bytes) {
+ ICING_RETURN_IF_ERROR(ValidatePostingListBytes(posting_list_bytes,
+ mmapped_block_.region_size()));
+ header_->free_list_posting_list_index = kInvalidPostingListIndex;
+ header_->next_block_index = kInvalidBlockIndex;
+ header_->posting_list_bytes = posting_list_bytes;
+
+ // Starting with the last posting list, prepend each posting list to the free
+ // list. At the end, the beginning of the free list should be the first
+ // posting list.
+ for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
+ posting_list_index >= 0; --posting_list_index) {
+ // Adding the posting list at posting_list_index to the free list will
+ // modify both the posting list and also
+ // header_->free_list_posting_list_index.
+ FreePostingList(posting_list_index);
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<PostingListUsed>
+IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
+ if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Cannot get posting list with index %d in IndexBlock with only %d "
+ "posting lists.",
+ posting_list_index, max_num_posting_lists()));
+ }
+ return PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
+ get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
+}
+
+libtextclassifier3::StatusOr<PostingListIndex>
+IndexBlock::AllocatePostingList() {
+ if (!has_free_posting_lists()) {
+ return absl_ports::ResourceExhaustedError(
+ "No available posting lists to allocate.");
+ }
+
+ // Pull one off the free list.
+ PostingListIndex posting_list_index = header_->free_list_posting_list_index;
+
+ // We know at this point that posting_list_bytes will return a valid pl size
+ // (because an already initialized IndexBlock instance can't have an invalid
+ // posting_list_bytes). So CreateFromPreexistingPostingListFreeRegion will
+ // always return OK and ValueOrDie is safe to call.
+ auto posting_list_or =
+ PostingListFree::CreateFromPreexistingPostingListFreeRegion(
+ get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
+ PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
+
+ header_->free_list_posting_list_index = plfree.get_next_posting_list_index();
+ if (header_->free_list_posting_list_index != kInvalidPostingListIndex &&
+ header_->free_list_posting_list_index >= max_num_posting_lists()) {
+ ICING_LOG(ERROR)
+ << "Free Posting List points to an invalid posting list index!";
+ header_->free_list_posting_list_index = kInvalidPostingListIndex;
+ }
+
+ // Make it a used posting list.
+ PostingListUsed::CreateFromUnitializedRegion(
+ get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
+ return posting_list_index;
+}
+
+void IndexBlock::FreePostingList(PostingListIndex posting_list_index) {
+ if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
+ ICING_LOG(ERROR) << "Cannot free posting list with index "
+ << posting_list_index << " in IndexBlock with only "
+ << max_num_posting_lists() << " posting lists.";
+ return;
+ }
+
+ // We know at this point that posting_list_bytes will return a valid pl size.
+ // So CreateFromUninitializedRegion will always return OK and ValueOrDie is
+ // safe to call.
+ auto posting_list_or = PostingListFree::CreateFromUnitializedRegion(
+ get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
+ PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
+
+ // Put at the head of the list.
+ plfree.set_next_posting_list_index(header_->free_list_posting_list_index);
+ header_->free_list_posting_list_index = posting_list_index;
+}
+
+char* IndexBlock::get_posting_list_ptr(PostingListIndex posting_list_index) {
+ return posting_lists_start_ptr_ +
+ get_posting_list_bytes() * posting_list_index;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/main/index-block.h b/icing/index/main/index-block.h
new file mode 100644
index 0000000..1d17e34
--- /dev/null
+++ b/icing/index/main/index-block.h
@@ -0,0 +1,215 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_MAIN_INDEX_BLOCK_H_
+#define ICING_INDEX_MAIN_INDEX_BLOCK_H_
+
+#include <string.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/main/posting-list-free.h"
+#include "icing/index/main/posting-list-used.h"
+#include "icing/legacy/index/icing-bit-util.h"
+
+namespace icing {
+namespace lib {
+
+inline constexpr uint32_t kInvalidBlockIndex = 0;
+
+// This class is used to manage I/O to a single flash block and to manage the
+// division of that flash block into PostingLists. It provides an interface to
+// allocate, free and read posting lists.
+//
+// An IndexBlock contains a small header and an array of fixed-size posting list
+// buffers. Initially, all posting lists are chained in a singly-linked free
+// list.
+//
+// When we want to get a new PostingList from an IndexBlock, we just
+// pull one off the free list. When the user wants to return the
+// PostingList to the free pool, we prepend it to the free list.
+class IndexBlock {
+ public:
+ // What is the maximum posting list size in bytes that can be stored in this
+ // block.
+ static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes) {
+ return (block_size_in_bytes - sizeof(BlockHeader)) / sizeof(Hit) *
+ sizeof(Hit);
+ }
+
+ // For a given min number of bits needed to store PostingListIndex for a
+ // block of "block_size", return the approximate number of hits that a full
+ // posting list in this block could accomodate.
+ static uint32_t ApproximateFullPostingListHitsForBlock(
+ uint32_t block_size, int posting_list_index_bits);
+
+ // Create an IndexBlock to reference the previously used region of the
+ // mmapped_file starting at offset with size block_size
+ //
+ // RETURNS:
+ // - a valid IndexBlock on success
+ // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
+ // if the posting list size stored in the region is not a valid posting
+ // list size or it exceeds max_posting_list_bytes(size).
+ // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
+ static libtextclassifier3::StatusOr<IndexBlock>
+ CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
+ std::string_view file_path,
+ off_t offset, uint32_t block_size);
+
+ // Create an IndexBlock to reference an uninitialized region of the
+ // mmapped_file starting at offset with size block_size. The IndexBlock will
+ // initialize the region to be an empty IndexBlock with posting lists of size
+ // posting_list_bytes.
+ //
+ // RETURNS:
+ // - a valid IndexBlock on success
+ // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
+ // if posting_list_bytes is not a valid posting list size or it exceeds
+ // max_posting_list_bytes(size).
+ // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
+ static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion(
+ const Filesystem& filesystem, std::string_view file_path, off_t offset,
+ uint32_t block_size, uint32_t posting_list_bytes);
+
+ IndexBlock(const IndexBlock&) = delete;
+ IndexBlock& operator=(const IndexBlock&) = delete;
+ IndexBlock(IndexBlock&&) = default;
+ IndexBlock& operator=(IndexBlock&&) = default;
+
+ // Instantiate a PostingListUsed at posting_list_index with the existing
+ // content in the IndexBlock.
+ //
+ // RETURNS:
+ // - a valid PostingListUsed on success
+ // - INVALID_ARGUMENT if posting_list_index >= max_num_posting_lists()
+ libtextclassifier3::StatusOr<PostingListUsed> GetAllocatedPostingList(
+ PostingListIndex posting_list_index);
+
+ // Allocates a PostingListUsed in the IndexBlock, if possible.
+ //
+ // RETURNS:
+ // - a valid PostingListIndex that can be used to retrieve the allocated
+ // PostingListUsed via a call to GetAllocatedPostingList
+ // - RESOURCE_EXHAUSTED if !has_free_posting_lists()
+ libtextclassifier3::StatusOr<PostingListIndex> AllocatePostingList();
+
+ // Free posting list at posting_list_index.
+ //
+ // It is considered an error to "double-free" a posting list. You should never
+ // call FreePostingList(index) with the same index twice, unless that index
+ // was returned by an intervening AllocatePostingList() call.
+ //
+ // Ex.
+ // PostingListIndex index = block.AllocatePostingList();
+ // DoSomething(block.GetAllocatedPostingList(index));
+ // block.FreePostingList(index);
+ // block.FreePostingList(index); // Argh! What are you doing?!
+ // ...
+ // PostingListIndex index = block.AllocatePostingList();
+ // DoSomething(block.GetAllocatedPostingList(index));
+ // block.FreePostingList(index);
+ // index = block.AllocatePostingList();
+ // DoSomethingElse(block.GetAllocatedPostingList(index));
+ // // A-Ok! We called AllocatePostingList() since the last FreePostingList()
+ // call. block.FreePostingList(index);
+ //
+ // Has no effect if posting_list_index >= max_num_posting_lists().
+ void FreePostingList(PostingListIndex posting_list_index);
+
+ // Blocks can be chained. The interpretation of the chaining is up
+ // to the caller.
+ uint32_t next_block_index() const { return header_->next_block_index; }
+
+ void set_next_block_index(uint32_t next_block_index) {
+ header_->next_block_index = next_block_index;
+ }
+
+ // Retrieves the size (in bytes) of the posting lists in this IndexBlock.
+ uint32_t get_posting_list_bytes() const {
+ return header_->posting_list_bytes;
+ }
+
+ // Maximum number of posting lists in the block.
+ uint32_t max_num_posting_lists() const {
+ return total_posting_lists_bytes() / get_posting_list_bytes();
+ }
+
+ // Number of bits required to store the largest PostingListIndex in this
+ // block.
+ int posting_list_index_bits() const {
+ return BitsToStore(max_num_posting_lists());
+ }
+
+ // Returns whether or not there are available posting lists in the free list.
+ bool has_free_posting_lists() const {
+ return header_->free_list_posting_list_index != kInvalidPostingListIndex;
+ }
+
+ private:
+ // Assumes that mmapped_file already has established a valid mapping to the
+ // requested block.
+ IndexBlock(MemoryMappedFile mmapped_block);
+
+ // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
+ // all posting lists to the free list.
+ //
+ // RETURNS:
+ // - OK, on success
+ // - INVALID_ARGUMENT if posting_list_bytes is a valid posting list size.
+ libtextclassifier3::Status Reset(int posting_list_bytes);
+
+ char* get_posting_list_ptr(PostingListIndex posting_list_index);
+
+ // Bytes in the block available for posting lists (minus header,
+ // alignment, etc.).
+ uint32_t total_posting_lists_bytes() const {
+ return block_size_in_bytes_ - sizeof(BlockHeader);
+ }
+
+ struct BlockHeader {
+ // Index of the next block if this block is being chained or part of a free
+ // list.
+ uint32_t next_block_index;
+
+ // Index to the first PostingListFree in the IndexBlock. This is the start
+ // of the free list.
+ PostingListIndex free_list_posting_list_index;
+
+ // The size of each posting list in the IndexBlock.
+ uint32_t posting_list_bytes;
+ };
+ // Pointer to the header of this block. The header is used to store info about
+ // this block and its posting lists.
+ BlockHeader* header_;
+ // Pointer to the beginning of the posting lists region - the area the block
+ // after the header.
+ char* posting_lists_start_ptr_;
+ uint32_t block_size_in_bytes_;
+
+ // MemoryMappedFile used to interact with the underlying flash block.
+ MemoryMappedFile mmapped_block_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_MAIN_INDEX_BLOCK_H_
diff --git a/icing/index/main/index-block_test.cc b/icing/index/main/index-block_test.cc
new file mode 100644
index 0000000..08ba57d
--- /dev/null
+++ b/icing/index/main/index-block_test.cc
@@ -0,0 +1,354 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/main/index-block.h"
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/index/main/posting-list-used.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+static constexpr int kBlockSize = 4096;
+
+bool CreateFileWithSize(const Filesystem& filesystem, const std::string& file,
+ int size) {
+ size_t parent_dir_end = file.find_last_of('/');
+ if (parent_dir_end == std::string::npos) {
+ return false;
+ }
+ std::string file_dir = file.substr(0, parent_dir_end);
+ return filesystem.CreateDirectoryRecursively(file_dir.c_str()) &&
+ filesystem.Grow(file.c_str(), size);
+}
+
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+
+TEST(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
+ constexpr int kPostingListBytes = 20;
+
+ Filesystem filesystem;
+ std::string flash_file = GetTestTempDir() + "/flash/0";
+ // Grow the file by one block for the IndexBlock to use.
+ ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
+
+ {
+ // Create an IndexBlock from this newly allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize,
+ kPostingListBytes));
+ EXPECT_TRUE(block.has_free_posting_lists());
+ }
+}
+
+TEST(IndexBlockTest, SizeAccessorsWorkCorrectly) {
+ constexpr int kPostingListBytes1 = 20;
+
+ Filesystem filesystem;
+ std::string flash_file = GetTestTempDir() + "/flash/0";
+ // Grow the file by one block for the IndexBlock to use.
+ ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
+
+ // Create an IndexBlock from this newly allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize,
+ kPostingListBytes1));
+ EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes1));
+ // There should be (4096 - 12) / 20 = 204 posting lists
+ // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 203 in only 8
+ // bits.
+ EXPECT_THAT(block.max_num_posting_lists(), Eq(204));
+ EXPECT_THAT(block.posting_list_index_bits(), Eq(8));
+
+ constexpr int kPostingListBytes2 = 200;
+
+ // Create an IndexBlock from this newly allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0,
+ kBlockSize, kPostingListBytes2));
+ EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes2));
+ // There should be (4096 - 12) / 200 = 20 posting lists
+ // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5
+ // bits.
+ EXPECT_THAT(block.max_num_posting_lists(), Eq(20));
+ EXPECT_THAT(block.posting_list_index_bits(), Eq(5));
+}
+
+TEST(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
+ constexpr int kPostingListBytes = 2000;
+
+ Filesystem filesystem;
+ std::string flash_file = GetTestTempDir() + "/flash/0";
+ // Grow the file by one block for the IndexBlock to use.
+ ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
+
+ std::vector<Hit> test_hits{
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ };
+ PostingListIndex allocated_index;
+ {
+ // Create an IndexBlock from this newly allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file,
+ /*offset=*/0,
+ /*block_size=*/kBlockSize, kPostingListBytes));
+ // Add hits to the first posting list.
+ ICING_ASSERT_OK_AND_ASSIGN(allocated_index, block.AllocatePostingList());
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
+ block.GetAllocatedPostingList(allocated_index));
+ for (const Hit& hit : test_hits) {
+ ICING_ASSERT_OK(pl_used.PrependHit(hit));
+ }
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(
+ test_hits.rbegin(), test_hits.rend())));
+ }
+ {
+ // Create an IndexBlock from the previously allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block,
+ IndexBlock::CreateFromPreexistingIndexBlockRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize));
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
+ block.GetAllocatedPostingList(allocated_index));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(
+ test_hits.rbegin(), test_hits.rend())));
+ EXPECT_TRUE(block.has_free_posting_lists());
+ }
+}
+
+TEST(IndexBlockTest, IndexBlockMultiplePostingLists) {
+ constexpr int kPostingListBytes = 2000;
+
+ Filesystem filesystem;
+ std::string flash_file = GetTestTempDir() + "/flash/0";
+ // Grow the file by one block for the IndexBlock to use.
+ ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
+
+ std::vector<Hit> hits_in_posting_list1{
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ };
+ std::vector<Hit> hits_in_posting_list2{
+ Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore),
+ Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
+ Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore),
+ };
+ PostingListIndex allocated_index_1;
+ PostingListIndex allocated_index_2;
+ {
+ // Create an IndexBlock from this newly allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize,
+ kPostingListBytes));
+
+ // Add hits to the first posting list.
+ ICING_ASSERT_OK_AND_ASSIGN(allocated_index_1, block.AllocatePostingList());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used_1,
+ block.GetAllocatedPostingList(allocated_index_1));
+ for (const Hit& hit : hits_in_posting_list1) {
+ ICING_ASSERT_OK(pl_used_1.PrependHit(hit));
+ }
+ EXPECT_THAT(pl_used_1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
+ hits_in_posting_list1.rend())));
+
+ // Add hits to the second posting list.
+ ICING_ASSERT_OK_AND_ASSIGN(allocated_index_2, block.AllocatePostingList());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used_2,
+ block.GetAllocatedPostingList(allocated_index_2));
+ for (const Hit& hit : hits_in_posting_list2) {
+ ICING_ASSERT_OK(pl_used_2.PrependHit(hit));
+ }
+ EXPECT_THAT(pl_used_2.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
+ hits_in_posting_list2.rend())));
+
+ EXPECT_THAT(block.AllocatePostingList(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_FALSE(block.has_free_posting_lists());
+ }
+ {
+ // Create an IndexBlock from the previously allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block,
+ IndexBlock::CreateFromPreexistingIndexBlockRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used_1,
+ block.GetAllocatedPostingList(allocated_index_1));
+ EXPECT_THAT(pl_used_1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
+ hits_in_posting_list1.rend())));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PostingListUsed pl_used_2,
+ block.GetAllocatedPostingList(allocated_index_2));
+ EXPECT_THAT(pl_used_2.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
+ hits_in_posting_list2.rend())));
+ EXPECT_THAT(block.AllocatePostingList(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_FALSE(block.has_free_posting_lists());
+ }
+}
+
+TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
+ constexpr int kPostingListBytes = 2000;
+
+ Filesystem filesystem;
+ std::string flash_file = GetTestTempDir() + "/flash/0";
+ // Grow the file by one block for the IndexBlock to use.
+ ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
+
+ // Create an IndexBlock from this newly allocated file block.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block,
+ IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize, kPostingListBytes));
+
+ // Add hits to the first posting list.
+ std::vector<Hit> hits_in_posting_list1{
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ };
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1,
+ block.AllocatePostingList());
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_1,
+ block.GetAllocatedPostingList(allocated_index_1));
+ for (const Hit& hit : hits_in_posting_list1) {
+ ICING_ASSERT_OK(pl_used_1.PrependHit(hit));
+ }
+ EXPECT_THAT(pl_used_1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
+ hits_in_posting_list1.rend())));
+
+ // Add hits to the second posting list.
+ std::vector<Hit> hits_in_posting_list2{
+ Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore),
+ Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
+ Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore),
+ };
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2,
+ block.AllocatePostingList());
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_2,
+ block.GetAllocatedPostingList(allocated_index_2));
+ for (const Hit& hit : hits_in_posting_list2) {
+ ICING_ASSERT_OK(pl_used_2.PrependHit(hit));
+ }
+ EXPECT_THAT(pl_used_2.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
+ hits_in_posting_list2.rend())));
+
+ EXPECT_THAT(block.AllocatePostingList(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_FALSE(block.has_free_posting_lists());
+
+ // Now free the first posting list. Then, reallocate it and fill it with a
+ // different set of hits.
+ block.FreePostingList(allocated_index_1);
+ EXPECT_TRUE(block.has_free_posting_lists());
+
+ std::vector<Hit> hits_in_posting_list3{
+ Hit(/*section_id=*/12, /*document_id=*/0, /*score=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/1, Hit::kMaxHitScore),
+ Hit(/*section_id=*/0, /*document_id=*/2, /*score=*/2),
+ };
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3,
+ block.AllocatePostingList());
+ EXPECT_THAT(allocated_index_3, Eq(allocated_index_1));
+ ICING_ASSERT_OK_AND_ASSIGN(pl_used_1,
+ block.GetAllocatedPostingList(allocated_index_3));
+ for (const Hit& hit : hits_in_posting_list3) {
+ ICING_ASSERT_OK(pl_used_1.PrependHit(hit));
+ }
+ EXPECT_THAT(pl_used_1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits_in_posting_list3.rbegin(),
+ hits_in_posting_list3.rend())));
+ EXPECT_THAT(block.AllocatePostingList(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_FALSE(block.has_free_posting_lists());
+}
+
+TEST(IndexBlockTest, IndexBlockNextBlockIndex) {
+ constexpr int kPostingListBytes = 2000;
+ constexpr int kSomeBlockIndex = 22;
+
+ Filesystem filesystem;
+ std::string flash_file = GetTestTempDir() + "/flash/0";
+ // Grow the file by one block for the IndexBlock to use.
+ ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
+
+ {
+ // Create an IndexBlock from this newly allocated file block and set the
+ // next block index.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize,
+ kPostingListBytes));
+ EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex));
+ block.set_next_block_index(kSomeBlockIndex);
+ EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex));
+ }
+ {
+ // Create an IndexBlock from this previously allocated file block and make
+ // sure that next_block_index is still set properly.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block,
+ IndexBlock::CreateFromPreexistingIndexBlockRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize));
+ EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex));
+ }
+ {
+ // Create an IndexBlock, treating this file block as uninitialized. This
+ // reset the next_block_index to kInvalidBlockIndex.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
+ filesystem, flash_file, /*offset=*/0, kBlockSize,
+ kPostingListBytes));
+ EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex));
+ }
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/posting-list-free.h b/icing/index/main/posting-list-free.h
index a2eba82..4b27401 100644
--- a/icing/index/posting-list-free.h
+++ b/icing/index/main/posting-list-free.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_POSTING_LIST_FREE_H_
-#define ICING_INDEX_POSTING_LIST_FREE_H_
+#ifndef ICING_INDEX_MAIN_POSTING_LIST_FREE_H_
+#define ICING_INDEX_MAIN_POSTING_LIST_FREE_H_
#include <string.h>
#include <sys/mman.h>
@@ -23,7 +23,7 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/hit/hit.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-utils.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -33,7 +33,7 @@ namespace lib {
// A FlashIndexBlock can contain multiple posting lists. This specifies which
// PostingList in the FlashIndexBlock we want to refer to.
-using PostingListIndex = uint32_t;
+using PostingListIndex = int32_t;
inline constexpr PostingListIndex kInvalidPostingListIndex = ~0U;
// A posting list in the index block's free list.
@@ -126,4 +126,4 @@ class PostingListFree {
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_POSTING_LIST_FREE_H_
+#endif // ICING_INDEX_MAIN_POSTING_LIST_FREE_H_
diff --git a/icing/index/posting-list-free_test.cc b/icing/index/main/posting-list-free_test.cc
index 80b8957..a152934 100644
--- a/icing/index/posting-list-free_test.cc
+++ b/icing/index/main/posting-list-free_test.cc
@@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/posting-list-free.h"
+#include "icing/index/main/posting-list-free.h"
#include <cstdint>
#include <memory>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gtest/gtest.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-utils.h"
#include "icing/testing/common-matchers.h"
namespace icing {
diff --git a/icing/index/posting-list-used.cc b/icing/index/main/posting-list-used.cc
index 708b13b..a439c45 100644
--- a/icing/index/posting-list-used.cc
+++ b/icing/index/main/posting-list-used.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/posting-list-used.h"
+#include "icing/index/main/posting-list-used.h"
#include <algorithm>
#include <cinttypes>
@@ -20,7 +20,7 @@
#include <limits>
#include "icing/absl_ports/canonical_errors.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-utils.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/index/icing-bit-util.h"
#include "icing/util/status-macros.h"
@@ -57,7 +57,10 @@ PostingListUsed::CreateFromUnitializedRegion(void *posting_list_buffer,
return posting_list_used;
}
-void PostingListUsed::Clear() { set_start_byte_offset(size_in_bytes_); }
+void PostingListUsed::Clear() {
+ // Safe to ignore return value because size_in_bytes_ a valid argument.
+ set_start_byte_offset(size_in_bytes_);
+}
libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) {
ICING_RETURN_ERROR_IF_NULL(other);
@@ -71,7 +74,7 @@ libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) {
return absl_ports::FailedPreconditionError(
"This posting list is in an invalid state and can't be used!");
}
- if (other->IsPostingListValid()) {
+ if (!other->IsPostingListValid()) {
return absl_ports::InvalidArgumentError(
"Cannot MoveFrom an invalid posting list!");
}
@@ -82,7 +85,7 @@ libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) {
while (other->full() || other->almost_full() ||
(size_in_bytes_ - posting_list_utils::kSpecialHitsSize <
other->BytesUsed())) {
- if (other->GetHitsInternal(/*limit=*/1, /*pop=*/true, &hits) != 1) {
+ if (!other->GetHitsInternal(/*limit=*/1, /*pop=*/true, &hits).ok()) {
return absl_ports::AbortedError(
"Unable to retrieve hits from other posting list.");
}
@@ -96,7 +99,7 @@ libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) {
// Because we popped all hits from other outside of the compressed area and we
// guaranteed that other->BytesUsed is less than size_in_bytes_ -
// kSpecialHitSize. This is guaranteed to be a valid byte offset for the
- // NOT_FULL state.
+ // NOT_FULL state, so ignoring the value is safe.
set_start_byte_offset(size_in_bytes_ - other->BytesUsed());
// Put back remaining hits.
@@ -127,20 +130,22 @@ uint32_t PostingListUsed::GetPadEnd(uint32_t offset) const {
return pad_end;
}
-void PostingListUsed::PadToEnd(uint32_t start, uint32_t end) {
+bool PostingListUsed::PadToEnd(uint32_t start, uint32_t end) {
if (end > size_in_bytes_) {
ICING_LOG(ERROR) << "Cannot pad a region that ends after size!";
- return;
+ return false;
}
// In VarInt a value of 0 encodes to 0.
memset(posting_list_buffer_ + start, 0, end - start);
+ return true;
}
libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull(
const Hit &hit) {
// Get delta between first hit and the new hit. Try to fit delta
// in the padded area and put new hit at the special position 1.
- Hit cur = get_special_hit(1);
+ // Calling ValueOrDie is safe here because 1 < kNumSpecialHits.
+ Hit cur = get_special_hit(1).ValueOrDie();
if (cur.value() <= hit.value()) {
return absl_ports::InvalidArgumentError(
"Hit being prepended must be strictly less than the most recent Hit");
@@ -164,12 +169,15 @@ libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull(
uint8_t *score_offset = delta_offset + delta_len;
memcpy(score_offset, &score, cur_score_bytes);
- // Now first hit is the new hit, at special position 1.
+ // Now first hit is the new hit, at special position 1. Safe to ignore the
+ // return value because 1 < kNumSpecialHits.
set_special_hit(1, hit);
+ // Safe to ignore the return value because sizeof(Hit) is a valid argument.
set_start_byte_offset(sizeof(Hit));
} else {
// No space for delta. We put the new hit at special position 0
- // and go to the full state.
+ // and go to the full state. Safe to ignore the return value because 1 <
+ // kNumSpecialHits.
set_special_hit(0, hit);
}
return libtextclassifier3::Status::OK;
@@ -178,13 +186,17 @@ libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull(
void PostingListUsed::PrependHitToEmpty(const Hit &hit) {
// First hit to be added. Just add verbatim, no compression.
if (size_in_bytes_ == posting_list_utils::kSpecialHitsSize) {
+ // Safe to ignore the return value because 1 < kNumSpecialHits
set_special_hit(1, hit);
+ // Safe to ignore the return value because sizeof(Hit) is a valid argument.
set_start_byte_offset(sizeof(Hit));
} else {
// Since this is the first hit, size != kSpecialHitsSize and
// size % sizeof(Hit) == 0, we know that there is room to fit 'hit' into
- // the compressed region.
- uint32_t offset = PrependHitUncompressed(hit, size_in_bytes_);
+ // the compressed region, so ValueOrDie is safe.
+ uint32_t offset = PrependHitUncompressed(hit, size_in_bytes_).ValueOrDie();
+ // Safe to ignore the return value because PrependHitUncompressed is
+ // guaranteed to return a valid offset.
set_start_byte_offset(offset);
}
}
@@ -226,12 +238,14 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
memcpy(posting_list_buffer_ + offset, delta_buf, delta_len);
// Prepend new hit with (possibly) its score. We know that there is room
- // for 'hit' because of the if statement above.
- offset = PrependHitUncompressed(hit, offset);
- // offset is guaranteed to be valid here. The if above will guarantee that
- // offset >= kSpecialHitSize and < size_in_bytes_ because the if ensures
- // that there is enough room between offset and kSpecialHitSize to fit the
- // delta of the previous hit, any score and the uncompressed hit.
+ // for 'hit' because of the if statement above, so calling ValueOrDie is
+ // safe.
+ offset = PrependHitUncompressed(hit, offset).ValueOrDie();
+ // offset is guaranteed to be valid here. So it's safe to ignore the return
+ // value. The if above will guarantee that offset >= kSpecialHitSize and <
+ // size_in_bytes_ because the if ensures that there is enough room between
+ // offset and kSpecialHitSize to fit the delta of the previous hit, any
+ // score and the uncompressed hit.
set_start_byte_offset(offset);
} else if (posting_list_utils::kSpecialHitsSize + delta_len <= offset) {
// Only have space for delta. The new hit must be put in special
@@ -241,13 +255,17 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
offset -= delta_len;
memcpy(posting_list_buffer_ + offset, delta_buf, delta_len);
- // Prepend pad.
+ // Prepend pad. Safe to ignore the return value of PadToEnd because offset
+ // must be less than size_in_bytes_. Otherwise, this function already would
+ // have returned FAILED_PRECONDITION.
PadToEnd(posting_list_utils::kSpecialHitsSize, offset);
- // Put new hit in special position 1.
+ // Put new hit in special position 1. Safe to ignore return value because 1
+ // < kNumSpecialHits.
set_special_hit(1, hit);
- // State almost_full.
+ // State almost_full. Safe to ignore the return value because sizeof(Hit) is
+ // a valid argument.
set_start_byte_offset(sizeof(Hit));
} else {
// Very rare case where delta is larger than sizeof(Hit::Value)
@@ -256,10 +274,18 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
// special position 0.
Hit cur(cur_value);
if (cur.has_score()) {
- cur = Hit(cur_value, ReadScore(offset));
+ // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
+ // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
+ // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
+ // offset < size_in_bytes, so it is safe to call ValueOrDie here.
+ cur = Hit(cur_value, ReadScore(offset).ValueOrDie());
offset += sizeof(Hit::Score);
}
+ // Safe to ignore the return value of PadToEnd because offset must be less
+ // than size_in_bytes_. Otherwise, this function already would have returned
+ // FAILED_PRECONDITION.
PadToEnd(posting_list_utils::kSpecialHitsSize, offset);
+ // Safe to ignore the return value here because 0 and 1 < kNumSpecialHits.
set_special_hit(1, cur);
set_special_hit(0, hit);
}
@@ -292,18 +318,20 @@ libtextclassifier3::Status PostingListUsed::PrependHit(const Hit &hit) {
}
}
-std::vector<Hit> PostingListUsed::GetHits() const {
+libtextclassifier3::StatusOr<std::vector<Hit>> PostingListUsed::GetHits()
+ const {
std::vector<Hit> hits_out;
- GetHits(&hits_out);
+ ICING_RETURN_IF_ERROR(GetHits(&hits_out));
return hits_out;
}
-void PostingListUsed::GetHits(std::vector<Hit> *hits_out) const {
- GetHitsInternal(/*limit=*/std::numeric_limits<uint32_t>::max(), /*pop=*/false,
- hits_out);
+libtextclassifier3::Status PostingListUsed::GetHits(
+ std::vector<Hit> *hits_out) const {
+ return GetHitsInternal(/*limit=*/std::numeric_limits<uint32_t>::max(),
+ /*pop=*/false, hits_out);
}
-void PostingListUsed::PopFrontHits(uint32_t num_hits) {
+libtextclassifier3::Status PostingListUsed::PopFrontHits(uint32_t num_hits) {
if (num_hits == 1 && full()) {
// The PL is in full status which means that we save 2 uncompressed hits in
// the 2 special postions. But full status may be reached by 2 different
@@ -358,7 +386,7 @@ void PostingListUsed::PopFrontHits(uint32_t num_hits) {
// Popping 2 hits should never fail because we've just ensured that the
// posting list is in the FULL state.
- GetHitsInternal(/*limit=*/2, /*pop=*/true, &out);
+ ICING_RETURN_IF_ERROR(GetHitsInternal(/*limit=*/2, /*pop=*/true, &out));
// PrependHit should never fail because out[1] is a valid hit less than
// previous hits in the posting list and because there's no way that the
@@ -366,12 +394,13 @@ void PostingListUsed::PopFrontHits(uint32_t num_hits) {
// AND another hit.
PrependHit(out[1]);
} else if (num_hits > 0) {
- GetHitsInternal(/*limit=*/num_hits, /*pop=*/true, nullptr);
+ return GetHitsInternal(/*limit=*/num_hits, /*pop=*/true, nullptr);
}
+ return libtextclassifier3::Status::OK;
}
-uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop,
- std::vector<Hit> *out) const {
+libtextclassifier3::Status PostingListUsed::GetHitsInternal(
+ uint32_t limit, bool pop, std::vector<Hit> *out) const {
// Put current uncompressed val here.
Hit::Value val = Hit::kInvalidValue;
uint32_t offset = get_start_byte_offset();
@@ -379,7 +408,9 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop,
// First traverse the first two special positions.
while (count < limit && offset < posting_list_utils::kSpecialHitsSize) {
- Hit hit = get_special_hit(offset / sizeof(Hit));
+ // Calling ValueOrDie is safe here because offset / sizeof(Hit) <
+ // kNumSpecialHits because of the check above.
+ Hit hit = get_special_hit(offset / sizeof(Hit)).ValueOrDie();
val = hit.value();
if (out != nullptr) {
out->push_back(hit);
@@ -407,7 +438,16 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop,
}
Hit hit(val);
if (hit.has_score()) {
- hit = Hit(val, ReadScore(offset));
+ auto score_or = ReadScore(offset);
+ if (!score_or.ok()) {
+ // This posting list has been corrupted somehow. The first hit of the
+ // posting list claims to have a score, but there's no more room in the
+ // posting list for that score to exist. Return an empty vector and zero
+ // to indicate no hits retrieved.
+ out->clear();
+ return absl_ports::InternalError("Posting list has been corrupted!");
+ }
+ hit = Hit(val, score_or.ValueOrDie());
offset += sizeof(Hit::Score);
}
if (out != nullptr) {
@@ -435,18 +475,33 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop,
offset -= sizeof(Hit::Value);
memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value));
} else {
- // val won't fit in compressed area. Also see if there is a
- // score.
+ // val won't fit in compressed area. Also see if there is a score.
Hit hit(val);
if (hit.has_score()) {
- hit = Hit(val, ReadScore(offset));
+ auto score_or = ReadScore(offset);
+ if (!score_or.ok()) {
+ // This posting list has been corrupted somehow. The first hit of
+ // the posting list claims to have a score, but there's no more room
+ // in the posting list for that score to exist. Return an empty
+ // vector and zero to indicate no hits retrieved. Do not pop
+ // anything.
+ out->clear();
+ return absl_ports::InternalError(
+ "Posting list has been corrupted!");
+ }
+ hit = Hit(val, score_or.ValueOrDie());
}
+ // Okay to ignore the return value here because 1 < kNumSpecialHits.
mutable_this->set_special_hit(1, hit);
+
+ // Prepend pad. Safe to ignore the return value of PadToEnd because
+ // offset must be less than size_in_bytes_ thanks to the if above.
mutable_this->PadToEnd(posting_list_utils::kSpecialHitsSize, offset);
offset = sizeof(Hit);
}
}
- // offset is guaranteed to be valid. It falls into one of four scenarios:
+ // offset is guaranteed to be valid so ignoring the return value of
+ // set_start_byte_offset is safe. It falls into one of four scenarios:
// Scenario 1: the above if was false because offset is not < size_in_bytes_
// In this case, offset must be == size_in_bytes_ because we reached
// offset by unwinding hits on the posting list.
@@ -466,26 +521,28 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop,
mutable_this->set_start_byte_offset(offset);
}
- return count;
+ return libtextclassifier3::Status::OK;
}
-Hit PostingListUsed::get_special_hit(uint32_t index) const {
+libtextclassifier3::StatusOr<Hit> PostingListUsed::get_special_hit(
+ uint32_t index) const {
static_assert(sizeof(Hit::Value) >= sizeof(uint32_t), "HitTooSmall");
- if (index >= posting_list_utils::kSpecialHitsSize / sizeof(Hit)) {
- ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1";
- return Hit();
+ if (index >= posting_list_utils::kNumSpecialHits || index < 0) {
+ return absl_ports::InvalidArgumentError(
+ "Special hits only exist at indices 0 and 1");
}
Hit val;
memcpy(&val, posting_list_buffer_ + index * sizeof(val), sizeof(val));
return val;
}
-void PostingListUsed::set_special_hit(uint32_t index, const Hit &val) {
- if (index >= posting_list_utils::kSpecialHitsSize / sizeof(Hit)) {
+bool PostingListUsed::set_special_hit(uint32_t index, const Hit &val) {
+ if (index >= posting_list_utils::kNumSpecialHits || index < 0) {
ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1";
- return;
+ return false;
}
memcpy(posting_list_buffer_ + index * sizeof(val), &val, sizeof(val));
+ return true;
}
uint32_t PostingListUsed::BytesUsed() const {
@@ -515,17 +572,20 @@ uint32_t PostingListUsed::MinPostingListSizeToFit() const {
bool PostingListUsed::IsPostingListValid() const {
if (almost_full()) {
- // Special Hit 1 should hold a Hit.
- if (!get_special_hit(1).is_valid()) {
+ // Special Hit 1 should hold a Hit. Calling ValueOrDie is safe because we
+ // know that 1 < kNumSpecialHits.
+ if (!get_special_hit(1).ValueOrDie().is_valid()) {
ICING_LOG(ERROR)
<< "Both special hits cannot be invalid at the same time.";
return false;
}
} else if (!full()) {
- // NOT_FULL. Special Hit 0 should hold a valid offset.
- if (get_special_hit(0).value() > size_in_bytes_ ||
- get_special_hit(0).value() < posting_list_utils::kSpecialHitsSize) {
- ICING_LOG(ERROR) << "Hit: " << get_special_hit(0).value()
+ // NOT_FULL. Special Hit 0 should hold a valid offset. Calling ValueOrDie is
+ // safe because we know that 0 < kNumSpecialHits.
+ if (get_special_hit(0).ValueOrDie().value() > size_in_bytes_ ||
+ get_special_hit(0).ValueOrDie().value() <
+ posting_list_utils::kSpecialHitsSize) {
+ ICING_LOG(ERROR) << "Hit: " << get_special_hit(0).ValueOrDie().value()
<< " size: " << size_in_bytes_
<< " sp size: " << posting_list_utils::kSpecialHitsSize;
return false;
@@ -540,55 +600,57 @@ uint32_t PostingListUsed::get_start_byte_offset() const {
} else if (almost_full()) {
return sizeof(Hit);
} else {
- // NOT_FULL
- return get_special_hit(0).value();
+ // NOT_FULL, calling ValueOrDie is safe because we know that 0 <
+ // kNumSpecialHits.
+ return get_special_hit(0).ValueOrDie().value();
}
}
-void PostingListUsed::set_start_byte_offset(uint32_t offset) {
+bool PostingListUsed::set_start_byte_offset(uint32_t offset) {
if (offset > size_in_bytes_) {
ICING_LOG(ERROR) << "offset cannot be a value greater than size "
<< size_in_bytes_ << ". offset is " << offset << ".";
- return;
+ return false;
}
if (offset < posting_list_utils::kSpecialHitsSize && offset > sizeof(Hit)) {
ICING_LOG(ERROR) << "offset cannot be a value between (" << sizeof(Hit)
<< ", " << posting_list_utils::kSpecialHitsSize
<< "). offset is " << offset << ".";
- return;
+ return false;
}
if (offset < sizeof(Hit) && offset != 0) {
ICING_LOG(ERROR) << "offset cannot be a value between (0, " << sizeof(Hit)
<< "). offset is " << offset << ".";
- return;
+ return false;
}
if (offset >= posting_list_utils::kSpecialHitsSize) {
- // not_full state.
+ // not_full state. Safe to ignore the return value because 0 and 1 are both
+ // < kNumSpecialHits.
set_special_hit(0, Hit(offset));
set_special_hit(1, Hit());
} else if (offset == sizeof(Hit)) {
- // almost_full state.
+ // almost_full state. Safe to ignore the return value because 1 is both <
+ // kNumSpecialHits.
set_special_hit(0, Hit());
}
// Nothing to do for the FULL state - the offset isn't actually stored
// anywhere and both special hits hold valid hits.
+ return true;
}
-uint32_t PostingListUsed::PrependHitUncompressed(const Hit &hit,
- uint32_t offset) {
+libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed(
+ const Hit &hit, uint32_t offset) {
if (hit.has_score()) {
if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit)) {
- ICING_LOG(ERROR) << "Not enough room to prepend Hit at offset " << offset
- << ".";
- return offset;
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Not enough room to prepend Hit at offset %d.", offset));
}
offset -= sizeof(Hit);
memcpy(posting_list_buffer_ + offset, &hit, sizeof(Hit));
} else {
if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value)) {
- ICING_LOG(ERROR) << "Not enough room to prepend Hit::Value at offset "
- << offset << ".";
- return offset;
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Not enough room to prepend Hit::Value at offset %d.", offset));
}
offset -= sizeof(Hit::Value);
Hit::Value val = hit.value();
@@ -597,12 +659,12 @@ uint32_t PostingListUsed::PrependHitUncompressed(const Hit &hit,
return offset;
}
-Hit::Score PostingListUsed::ReadScore(uint32_t offset) const {
+libtextclassifier3::StatusOr<Hit::Score> PostingListUsed::ReadScore(
+ uint32_t offset) const {
if (offset + sizeof(Hit::Score) > size_in_bytes_) {
- ICING_LOG(FATAL)
- << "offset " << offset
- << " must not point past the end of the posting list of size "
- << size_in_bytes_ << ".";
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "offset %d must not point past the end of the posting list of size %d.",
+ offset, size_in_bytes_));
}
Hit::Score score;
memcpy(&score, posting_list_buffer_ + offset, sizeof(Hit::Score));
diff --git a/icing/index/posting-list-used.h b/icing/index/main/posting-list-used.h
index 492435b..8bc9c8d 100644
--- a/icing/index/posting-list-used.h
+++ b/icing/index/main/posting-list-used.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_POSTING_LIST_USED_H_
-#define ICING_INDEX_POSTING_LIST_USED_H_
+#ifndef ICING_INDEX_MAIN_POSTING_LIST_USED_H_
+#define ICING_INDEX_MAIN_POSTING_LIST_USED_H_
#include <string.h>
#include <sys/mman.h>
@@ -24,7 +24,7 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/hit/hit.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-utils.h"
#include "icing/util/logging.h"
namespace icing {
@@ -97,15 +97,27 @@ class PostingListUsed {
uint32_t PrependHitArray(const T *array, uint32_t num_hits,
bool keep_prepended);
- // Return hits sorted by the reverse order of prepending.
- std::vector<Hit> GetHits() const;
+ // Retrieves the hits stored in the posting list.
+ //
+ // RETURNS:
+ // - On success, a vector of hits sorted by the reverse order of prepending.
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::StatusOr<std::vector<Hit>> GetHits() const;
// Same as GetHits but appends hits to hits_out.
- void GetHits(std::vector<Hit> *hits_out) const;
+ //
+ // RETURNS:
+ // - On success, a vector of hits sorted by the reverse order of prepending.
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::Status GetHits(std::vector<Hit> *hits_out) const;
// Undo the last num_hits hits prepended. If num_hits > number of
// hits we clear all hits.
- void PopFrontHits(uint32_t num_hits);
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::Status PopFrontHits(uint32_t num_hits);
// Returns bytes used by actual hits.
uint32_t BytesUsed() const;
@@ -194,12 +206,15 @@ class PostingListUsed {
// Helpers to determine what state the posting list is in.
bool full() const {
- return get_special_hit(0).is_valid() && get_special_hit(1).is_valid();
+ return get_special_hit(0).ValueOrDie().is_valid() &&
+ get_special_hit(1).ValueOrDie().is_valid();
+ }
+ bool almost_full() const {
+ return !get_special_hit(0).ValueOrDie().is_valid();
}
- bool almost_full() const { return !get_special_hit(0).is_valid(); }
bool empty() const {
- return get_special_hit(0).value() == size_in_bytes_ &&
- !get_special_hit(1).is_valid();
+ return get_special_hit(0).ValueOrDie().value() == size_in_bytes_ &&
+ !get_special_hit(1).ValueOrDie().is_valid();
}
// Returns false if both special hits are invalid or if the offset value
@@ -236,10 +251,10 @@ class PostingListUsed {
// Sets the special hits to properly reflect what offset is (see layout
// comment for further details).
- // If offset > size_in_bytes_ or offset is (kSpecialHitsSize, sizeof(Hit)) or
- // offset is (sizeof(Hit), 0), then offset is considered invalid and this
- // function has no effect.
- void set_start_byte_offset(uint32_t offset);
+ //
+ // Returns false if offset > size_in_bytes_ or offset is (kSpecialHitsSize,
+ // sizeof(Hit)) or offset is (sizeof(Hit), 0). True, otherwise.
+ bool set_start_byte_offset(uint32_t offset);
// Manipulate padded areas. We never store the same hit value twice
// so a delta of 0 is a pad byte.
@@ -247,42 +262,53 @@ class PostingListUsed {
// Returns offset of first non-pad byte.
uint32_t GetPadEnd(uint32_t offset) const;
- // Fill padding between offset start and offset end with 0s. If end >
- // size_in_bytes_, this function has no effect.
- void PadToEnd(uint32_t start, uint32_t end);
+ // Fill padding between offset start and offset end with 0s.
+ // Returns false if end > size_in_bytes_. True, otherwise.
+ bool PadToEnd(uint32_t start, uint32_t end);
- // Helper for AppendHits/PopFrontHits. Returns number actually traversed (also
- // the size of out if non-NULL), which will always be equal to 'limit' unless
- // there are fewer than 'limit' hits in the posting list. out can be NULL.
+ // Helper for AppendHits/PopFrontHits. Adds limit number of hits to out or all
+ // hits in the posting list if the posting list contains less than limit
+ // number of hits. out can be NULL.
//
// NOTE: If called with limit=1, pop=true on a posting list that transitioned
// from NOT_FULL directly to FULL, GetHitsInternal will not return the posting
// list to NOT_FULL. Instead it will leave it in a valid state, but it will be
// ALMOST_FULL.
- uint32_t GetHitsInternal(uint32_t limit, bool pop,
- std::vector<Hit> *out) const;
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if the posting list has been corrupted somehow.
+ libtextclassifier3::Status GetHitsInternal(uint32_t limit, bool pop,
+ std::vector<Hit> *out) const;
- // Retrieves the value stored in the index-th special hit. If index is not
- // less than kSpecialHitSize / sizeof(Hit), returns Hit::kInvalidValue.
- Hit get_special_hit(uint32_t index) const;
+ // Retrieves the value stored in the index-th special hit.
+ //
+ // RETURNS:
+ // - A valid Hit, on success
+ // - INVALID_ARGUMENT if index is not less than kNumSpecialHits
+ libtextclassifier3::StatusOr<Hit> get_special_hit(uint32_t index) const;
// Sets the value stored in the index-th special hit to val. If index is not
// less than kSpecialHitSize / sizeof(Hit), this has no effect.
- void set_special_hit(uint32_t index, const Hit &val);
+ bool set_special_hit(uint32_t index, const Hit &val);
// Prepends hit to the memory region [offset - sizeof(Hit), offset] and
// returns the new beginning of the padded region.
//
- // If offset - kSpecialHitSize < sizeof(Hit/Hit::Value), then this function
- // has no effect.
- uint32_t PrependHitUncompressed(const Hit &hit, uint32_t offset);
+ // RETURNS:
+ // - The new beginning of the padded region, if successful.
+ // - INVALID_ARGUMENT if hit will not fit (uncompressed) between offset and
+ // kSpecialHitsSize
+ libtextclassifier3::StatusOr<uint32_t> PrependHitUncompressed(
+ const Hit &hit, uint32_t offset);
// Reads the score located at offset and returns it. Callers are responsible
// for ensuring that the bytes starting at offset actually represent a score.
//
- // REQUIRES: offset + sizeof(Hit::Score) < size_in_bytes_
- // REQUIRES enforced by CHECK
- Hit::Score ReadScore(uint32_t offset) const;
+ // RETURNS:
+ // - The score located at offset, if successful
+ // - INVALID_ARGUMENT if offset + sizeof(Hit::Score) >= size_in_bytes_
+ libtextclassifier3::StatusOr<Hit::Score> ReadScore(uint32_t offset) const;
// A byte array of size size_in_bytes_ containing encoded hits for this
// posting list.
@@ -318,4 +344,4 @@ uint32_t PostingListUsed::PrependHitArray(const T *array, uint32_t num_hits,
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_POSTING_LIST_USED_H_
+#endif // ICING_INDEX_MAIN_POSTING_LIST_USED_H_
diff --git a/icing/index/posting-list-used_test.cc b/icing/index/main/posting-list-used_test.cc
index a0e9514..eb62aeb 100644
--- a/icing/index/posting-list-used_test.cc
+++ b/icing/index/main/posting-list-used_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/posting-list-used.h"
+#include "icing/index/main/posting-list-used.h"
#include <fcntl.h>
#include <sys/stat.h>
@@ -32,18 +32,21 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-utils.h"
#include "icing/legacy/index/icing-bit-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/hit-test-utils.h"
-using std::min;
using std::reverse;
using std::vector;
using testing::ElementsAre;
using testing::ElementsAreArray;
+using testing::Eq;
using testing::IsEmpty;
+using testing::Le;
+using testing::Lt;
namespace icing {
namespace lib {
@@ -59,41 +62,6 @@ struct HitElt {
Hit hit;
};
-// Produces a vector with num_hits HitElts. When delta encoded each hit should
-// be 1 byte with a 1 byte Hit::Score.
-std::vector<HitElt> CreateHits(DocumentId start_docid, int num_hits) {
- std::vector<HitElt> hits;
- hits.reserve(num_hits);
- while (num_hits--) {
- Hit::Score score = (start_docid % 7) + 1;
- SectionId section_id = (start_docid + 2) % (kMaxSectionId + 1);
- hits.emplace_back(Hit(section_id, start_docid, score));
- ++start_docid;
- }
- std::reverse(hits.begin(), hits.end());
- return hits;
-}
-
-Hit CreateHit(Hit last_hit, int desired_byte_length) {
- Hit hit =
- (last_hit.section_id() == kMinSectionId)
- ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score())
- : Hit(last_hit.section_id() - 1, last_hit.document_id(),
- last_hit.score());
- uint8_t buf[5];
- while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
- desired_byte_length) {
- hit = (hit.section_id() == kMinSectionId)
- ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score())
- : Hit(hit.section_id() - 1, hit.document_id(), hit.score());
- }
- return hit;
-}
-
-DocumentId InvertDocumentId(DocumentId document_id) {
- return kMaxDocumentId - document_id;
-}
-
TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
static const int kNumHits = 2551;
static const size_t kHitsSize = kNumHits * sizeof(Hit);
@@ -109,16 +77,16 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
pl_used.PrependHit(hit0);
// Size = sizeof(uncompressed hit0)
int expected_size = sizeof(Hit);
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
Hit hit1(/*section_id=*/0, 1, Hit::kMaxHitScore);
pl_used.PrependHit(hit1);
// Size = sizeof(uncompressed hit1)
// + sizeof(hit0-hit1) + sizeof(hit0::score)
expected_size += 2 + sizeof(Hit::Score);
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
Hit hit2(/*section_id=*/0, 2, /*score=*/56);
pl_used.PrependHit(hit2);
@@ -126,8 +94,8 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
// + sizeof(hit1-hit2)
// + sizeof(hit0-hit1) + sizeof(hit0::score)
expected_size += 2;
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit2, hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
Hit hit3(/*section_id=*/0, 3, Hit::kMaxHitScore);
pl_used.PrependHit(hit3);
@@ -136,8 +104,9 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
// + sizeof(hit1-hit2)
// + sizeof(hit0-hit1) + sizeof(hit0::score)
expected_size += 2 + sizeof(Hit::Score);
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit3, hit2, hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(),
+ IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
}
TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
@@ -161,8 +130,8 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
ICING_EXPECT_OK(pl_used.PrependHit(hit2));
// Size used will be 2+2+4=8 bytes
int expected_size = sizeof(Hit::Value) + 2 + 2;
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit2, hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
// Add one more hit to transition NOT_FULL -> ALMOST_FULL
Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3);
@@ -175,8 +144,9 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
// Because we're in ALMOST_FULL, the expected size is the size of the pl minus
// the one hit used to mark the posting list as ALMOST_FULL.
expected_size = kHitsSize - sizeof(Hit);
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit3, hit2, hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(),
+ IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
// Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
@@ -185,8 +155,9 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
// a 2-byte delta. That delta will fit in the compressed region (which will
// now have 9 bytes in use), hit4 will be placed in one of the special hits
// and the posting list will remain in ALMOST_FULL.
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit4, hit3, hit2, hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(),
+ IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0)));
// Add one more hit to transition ALMOST_FULL -> FULL
Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
@@ -195,9 +166,9 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
// a 2-byte delta which will not fit in the compressed region. So hit4 will
// remain in one of the special hits and hit5 will occupy the other, making
// the posting list FULL.
- EXPECT_LE(pl_used.BytesUsed(), kHitsSize);
+ EXPECT_THAT(pl_used.BytesUsed(), Le(kHitsSize));
EXPECT_THAT(pl_used.GetHits(),
- ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0));
+ IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)));
// The posting list is FULL. Adding another hit should fail.
Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1);
@@ -214,8 +185,8 @@ TEST(PostingListTest, PostingListUsedMinSize) {
static_cast<void *>(hits_buf.get()),
posting_list_utils::min_posting_list_size()));
// PL State: EMPTY
- EXPECT_LE(pl_used.BytesUsed(), 0);
- EXPECT_THAT(pl_used.GetHits(), IsEmpty());
+ EXPECT_THAT(pl_used.BytesUsed(), Eq(0));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty()));
// Add a hit, PL should shift to ALMOST_FULL state
Hit hit0(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
@@ -223,8 +194,8 @@ TEST(PostingListTest, PostingListUsedMinSize) {
ICING_EXPECT_OK(pl_used.PrependHit(hit0));
// Size = sizeof(uncompressed hit0)
int expected_size = sizeof(Hit);
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
// Add the smallest hit possible - no score and a delta of 1. PL should shift
// to FULL state.
@@ -233,16 +204,16 @@ TEST(PostingListTest, PostingListUsedMinSize) {
ICING_EXPECT_OK(pl_used.PrependHit(hit1));
// Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
expected_size += sizeof(Hit);
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
// Try to add the smallest hit possible. Should fail
Hit hit2(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false);
EXPECT_THAT(pl_used.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_LE(pl_used.BytesUsed(), expected_size);
- EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0));
+ EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
}
TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) {
@@ -271,7 +242,7 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) {
// only fit two hits. So PrependHitArray should fail.
uint32_t num_can_prepend = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
&hits_in[0], hits_in.size(), false);
- EXPECT_EQ(num_can_prepend, 2);
+ EXPECT_THAT(num_can_prepend, Eq(2));
int can_fit_hits = num_can_prepend;
// The PL has room for 2 hits. We should be able to add them without any
@@ -279,13 +250,13 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) {
const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2);
num_can_prepend = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
hits_in_ptr, can_fit_hits, false);
- EXPECT_EQ(num_can_prepend, can_fit_hits);
- EXPECT_EQ(size, pl_used.BytesUsed());
+ EXPECT_THAT(num_can_prepend, Eq(can_fit_hits));
+ EXPECT_THAT(size, Eq(pl_used.BytesUsed()));
std::deque<Hit> hits_pushed;
std::transform(hits_in.rbegin(),
hits_in.rend() - hits_in.size() + can_fit_hits,
std::front_inserter(hits_pushed), HitElt::get_hit);
- EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
@@ -325,12 +296,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
// five hits without issue, transitioning the PL from EMPTY -> NOT_FULL.
uint32_t num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
&hits_in[0], hits_in.size(), false);
- EXPECT_EQ(num_could_fit, hits_in.size());
- EXPECT_EQ(byte_size, pl_used.BytesUsed());
+ EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+ EXPECT_THAT(byte_size, Eq(pl_used.BytesUsed()));
std::deque<Hit> hits_pushed;
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
- EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed)));
Hit first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
hits_in.clear();
@@ -369,12 +340,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
// remain in the NOT_FULL state.
num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
&hits_in[0], hits_in.size(), false);
- EXPECT_EQ(num_could_fit, hits_in.size());
- EXPECT_EQ(byte_size, pl_used.BytesUsed());
+ EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+ EXPECT_THAT(byte_size, Eq(pl_used.BytesUsed()));
// All hits from hits_in were added.
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
- EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed)));
first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/3);
hits_in.clear();
@@ -402,12 +373,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
// unused space.
num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
&hits_in[0], hits_in.size(), false);
- EXPECT_EQ(num_could_fit, hits_in.size());
- EXPECT_EQ(byte_size, pl_used.BytesUsed());
+ EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+ EXPECT_THAT(byte_size, Eq(pl_used.BytesUsed()));
// All hits from hits_in were added.
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
- EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed)));
first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
hits_in.clear();
@@ -441,12 +412,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
// (1 byte).
num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
&hits_in[0], hits_in.size(), false);
- EXPECT_EQ(num_could_fit, hits_in.size());
- EXPECT_EQ(size, pl_used.BytesUsed());
+ EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
+ EXPECT_THAT(size, Eq(pl_used.BytesUsed()));
// All hits from hits_in were added.
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
- EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed));
+ EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) {
@@ -459,30 +430,35 @@ TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) {
std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
// Create an array with one too many hits
- vector<HitElt> hits_in_too_many = CreateHits(0, kNumHits + 1);
+ vector<Hit> hits_in_too_many =
+ CreateHits(kNumHits + 1, /*desired_byte_length=*/1);
+ vector<HitElt> hit_elts_in_too_many;
+ for (const Hit &hit : hits_in_too_many) {
+ hit_elts_in_too_many.emplace_back(hit);
+ }
ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
static_cast<void *>(hits_buf.get()),
posting_list_utils::min_posting_list_size()));
- // PrependHitArray should fail because hits_in_too_many is far too large for
- // the minimum size pl.
+ // PrependHitArray should fail because hit_elts_in_too_many is far too large
+ // for the minimum size pl.
uint32_t num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
- &hits_in_too_many[0], hits_in_too_many.size(), false);
- ASSERT_LT(num_could_fit, hits_in_too_many.size());
- ASSERT_EQ(pl_used.BytesUsed(), 0);
- ASSERT_THAT(pl_used.GetHits(), testing::IsEmpty());
+ &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), false);
+ ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
+ ASSERT_THAT(pl_used.BytesUsed(), Eq(0));
+ ASSERT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty()));
ICING_ASSERT_OK_AND_ASSIGN(
pl_used, PostingListUsed::CreateFromUnitializedRegion(
static_cast<void *>(hits_buf.get()), kHitsSize));
- // PrependHitArray should fail because hits_in_too_many is one hit too large
- // for this pl.
+ // PrependHitArray should fail because hit_elts_in_too_many is one hit too
+ // large for this pl.
num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>(
- &hits_in_too_many[0], hits_in_too_many.size(), false);
- ASSERT_LT(num_could_fit, hits_in_too_many.size());
- ASSERT_EQ(pl_used.BytesUsed(), 0);
- ASSERT_THAT(pl_used.GetHits(), testing::IsEmpty());
+ &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), false);
+ ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
+ ASSERT_THAT(pl_used.BytesUsed(), Eq(0));
+ ASSERT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty()));
}
TEST(PostingListTest, PostingListStatusJumpFromNotFullToFullAndBack) {
@@ -494,13 +470,13 @@ TEST(PostingListTest, PostingListStatusJumpFromNotFullToFullAndBack) {
ICING_ASSERT_OK(pl.PrependHit(Hit(Hit::kInvalidValue - 1, 0)));
uint32_t bytes_used = pl.BytesUsed();
// Status not full.
- CHECK_LE(bytes_used, pl_size - posting_list_utils::kSpecialHitsSize);
+ ASSERT_THAT(bytes_used, Le(pl_size - posting_list_utils::kSpecialHitsSize));
ICING_ASSERT_OK(pl.PrependHit(Hit(Hit::kInvalidValue >> 2, 0)));
// Status should jump to full directly.
- CHECK_EQ(pl.BytesUsed(), pl_size);
+ ASSERT_THAT(pl.BytesUsed(), Eq(pl_size));
pl.PopFrontHits(1);
// Status should return to not full as before.
- CHECK_EQ(pl.BytesUsed(), bytes_used);
+ ASSERT_THAT(pl.BytesUsed(), Eq(bytes_used));
}
TEST(PostingListTest, DeltaOverflow) {
@@ -533,5 +509,150 @@ TEST(PostingListTest, DeltaOverflow) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
+TEST(PostingListTest, MoveFrom) {
+ int size = 3 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+ std::vector<Hit> hits1 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+ for (const Hit &hit : hits1) {
+ ICING_ASSERT_OK(pl_used1.PrependHit(hit));
+ }
+
+ std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf2.get()), size));
+ std::vector<Hit> hits2 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
+ for (const Hit &hit : hits2) {
+ ICING_ASSERT_OK(pl_used2.PrependHit(hit));
+ }
+
+ ICING_ASSERT_OK(pl_used2.MoveFrom(&pl_used1));
+ EXPECT_THAT(pl_used2.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+ EXPECT_THAT(pl_used1.GetHits(), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListTest, MoveFromNullArgumentReturnsInvalidArgument) {
+ int size = 3 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+ std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+ for (const Hit &hit : hits) {
+ ICING_ASSERT_OK(pl_used1.PrependHit(hit));
+ }
+
+ EXPECT_THAT(pl_used1.MoveFrom(/*other=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(pl_used1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits.rbegin(), hits.rend())));
+}
+
+TEST(PostingListTest, MoveFromInvalidPostingListReturnsInvalidArgument) {
+ int size = 3 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+ std::vector<Hit> hits1 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+ for (const Hit &hit : hits1) {
+ ICING_ASSERT_OK(pl_used1.PrependHit(hit));
+ }
+
+ std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf2.get()), size));
+ std::vector<Hit> hits2 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
+ for (const Hit &hit : hits2) {
+ ICING_ASSERT_OK(pl_used2.PrependHit(hit));
+ }
+
+ // Write invalid hits to the beginning of pl_used1 to make it invalid.
+ Hit invalid_hit;
+ Hit *first_hit = reinterpret_cast<Hit *>(hits_buf1.get());
+ *first_hit = invalid_hit;
+ ++first_hit;
+ *first_hit = invalid_hit;
+ EXPECT_THAT(pl_used2.MoveFrom(&pl_used1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(pl_used2.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+}
+
+TEST(PostingListTest, MoveToInvalidPostingListReturnsInvalidArgument) {
+ int size = 3 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+ std::vector<Hit> hits1 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+ for (const Hit &hit : hits1) {
+ ICING_ASSERT_OK(pl_used1.PrependHit(hit));
+ }
+
+ std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf2.get()), size));
+ std::vector<Hit> hits2 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
+ for (const Hit &hit : hits2) {
+ ICING_ASSERT_OK(pl_used2.PrependHit(hit));
+ }
+
+ // Write invalid hits to the beginning of pl_used2 to make it invalid.
+ Hit invalid_hit;
+ Hit *first_hit = reinterpret_cast<Hit *>(hits_buf2.get());
+ *first_hit = invalid_hit;
+ ++first_hit;
+ *first_hit = invalid_hit;
+ EXPECT_THAT(pl_used2.MoveFrom(&pl_used1),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(pl_used1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+}
+
+TEST(PostingListTest, MoveToPostingListTooSmall) {
+ int size = 3 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+ std::vector<Hit> hits1 =
+ CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+ for (const Hit &hit : hits1) {
+ ICING_ASSERT_OK(pl_used1.PrependHit(hit));
+ }
+
+ std::unique_ptr<char[]> hits_buf2 =
+ std::make_unique<char[]>(posting_list_utils::min_posting_list_size());
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf2.get()),
+ posting_list_utils::min_posting_list_size()));
+ std::vector<Hit> hits2 =
+ CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2);
+ for (const Hit &hit : hits2) {
+ ICING_ASSERT_OK(pl_used2.PrependHit(hit));
+ }
+
+ EXPECT_THAT(pl_used2.MoveFrom(&pl_used1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(pl_used1.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
+ EXPECT_THAT(pl_used2.GetHits(),
+ IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/index/posting-list-utils.cc b/icing/index/main/posting-list-utils.cc
index b0e2929..b734767 100644
--- a/icing/index/posting-list-utils.cc
+++ b/icing/index/main/posting-list-utils.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/posting-list-utils.h"
+#include "icing/index/main/posting-list-utils.h"
#include "icing/legacy/index/icing-bit-util.h"
#include "icing/util/logging.h"
diff --git a/icing/index/posting-list-utils.h b/icing/index/main/posting-list-utils.h
index fc90d64..77537a7 100644
--- a/icing/index/posting-list-utils.h
+++ b/icing/index/main/posting-list-utils.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_POSTING_LIST_UTILS_H_
-#define ICING_INDEX_POSTING_LIST_UTILS_H_
+#ifndef ICING_INDEX_MAIN_POSTING_LIST_UTILS_H_
+#define ICING_INDEX_MAIN_POSTING_LIST_UTILS_H_
#include <cstdint>
@@ -26,7 +26,8 @@ namespace posting_list_utils {
// Represents the byte length of the two special hits described
// in the private section of posting-list-used.h.
-static constexpr uint32_t kSpecialHitsSize = sizeof(Hit) * 2;
+inline constexpr uint32_t kNumSpecialHits = 2;
+inline constexpr uint32_t kSpecialHitsSize = sizeof(Hit) * kNumSpecialHits;
constexpr uint32_t min_posting_list_size() { return kSpecialHitsSize; }
@@ -41,4 +42,4 @@ bool IsValidPostingListSize(uint32_t size_in_bytes);
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_POSTING_LIST_UTILS_H_
+#endif // ICING_INDEX_MAIN_POSTING_LIST_UTILS_H_
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index b1b5420..ac8d2eb 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -188,6 +188,18 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGet(
}
JNIEXPORT jbyteArray JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
+ JNIEnv* env, jclass clazz, jlong native_pointer) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(native_pointer);
+
+ icing::lib::GetAllNamespacesResultProto get_all_namespaces_result_proto =
+ icing->GetAllNamespaces();
+
+ return SerializeProtoToJniByteArray(env, get_all_namespaces_result_proto);
+}
+
+JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
JNIEnv* env, jclass clazz, jlong native_pointer,
jbyteArray search_spec_bytes, jbyteArray scoring_spec_bytes,
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index 7136ef8..c33be96 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -569,6 +569,7 @@ class IcingDynamicTrie : public IIcingStorage {
class CandidateSet;
// For testing only.
+ friend class IcingDynamicTrieTest_TrieShouldRespectLimits_Test;
friend class IcingDynamicTrieTest_SyncErrorRecovery_Test;
friend class IcingDynamicTrieTest_BitmapsClosedWhenInitFails_Test;
void GetHeader(IcingDynamicTrieHeader *hdr) const;
diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc
new file mode 100644
index 0000000..4fae52a
--- /dev/null
+++ b/icing/legacy/index/icing-dynamic-trie_test.cc
@@ -0,0 +1,921 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-dynamic-trie.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+using testing::ElementsAre;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr std::string_view kKeys[] = {
+ "", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg",
+};
+constexpr uint32_t kNumKeys = ABSL_ARRAYSIZE(kKeys);
+
+class IcingDynamicTrieTest : public ::testing::Test {
+ protected:
+ class NewValueMap : public IcingDynamicTrie::NewValueMap {
+ public:
+ const void* GetNewValue(uint32_t old_value_index) const override {
+ const_cast<NewValueMap*>(this)->buf_ = old_value_index;
+ return &buf_;
+ }
+
+ private:
+ uint32_t buf_;
+ };
+
+ // Output trie stats to stderr.
+ static void StatsDump(const IcingDynamicTrie& trie) {
+ IcingDynamicTrie::Stats stats;
+ trie.CollectStats(&stats);
+ DLOG(INFO) << "Stats:\n" << stats.DumpStats(true);
+ }
+
+ static void AddToTrie(IcingDynamicTrie* trie, uint32_t num_keys) {
+ std::string key;
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ key.clear();
+ IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i);
+ bool inserted = trie->Insert(key.c_str(), &i);
+ ASSERT_TRUE(inserted);
+ }
+ }
+
+ static void CheckTrie(const IcingDynamicTrie& trie, uint32_t num_keys) {
+ std::string key;
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ key.clear();
+ IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i);
+ uint32_t val;
+ bool found = trie.Find(key.c_str(), &val);
+ EXPECT_TRUE(found);
+ EXPECT_EQ(i, val);
+ }
+ }
+
+ static void PrintTrie(const IcingDynamicTrie& trie) {
+ std::vector<std::string> keys;
+ std::ostringstream os;
+ DLOG(INFO) << "Trie:\n";
+ trie.DumpTrie(&os, &keys);
+ DLOG(INFO) << os.str();
+ }
+
+ void SetUp() override {
+ trie_files_dir_ = GetTestTempDir() + "/trie_files";
+ trie_files_prefix_ = trie_files_dir_ + "/test_file_";
+ }
+
+ void TearDown() override {
+ IcingFilesystem filesystem;
+ filesystem.DeleteDirectoryRecursively(trie_files_dir_.c_str());
+ }
+
+ std::string trie_files_dir_;
+ std::string trie_files_prefix_;
+};
+
+constexpr std::string_view kCommonEnglishWords[] = {
+ "that", "was", "for", "on", "are", "with", "they", "be", "at",
+ "one", "have", "this", "from", "word", "but", "what", "some", "you",
+ "had", "the", "and", "can", "out", "other", "were", "which", "their",
+ "time", "will", "how", "said", "each", "tell", "may", "three"};
+constexpr uint32_t kCommonEnglishWordArrayLen =
+ sizeof(kCommonEnglishWords) / sizeof(std::string_view);
+
+TEST_F(IcingDynamicTrieTest, Simple) {
+ // Test simple key insertions.
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i));
+
+ uint32_t val;
+ bool found = trie.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+ }
+
+ EXPECT_EQ(trie.size(), kNumKeys);
+
+ StatsDump(trie);
+ std::vector<std::string> keys;
+ std::ostringstream os;
+ DLOG(INFO) << "Trie:\n";
+ trie.DumpTrie(&os, &keys);
+ DLOG(INFO) << os.str();
+ EXPECT_EQ(keys.size(), kNumKeys);
+}
+
+TEST_F(IcingDynamicTrieTest, Init) {
+ // Test create/init behavior.
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ EXPECT_FALSE(trie.is_initialized());
+ EXPECT_FALSE(trie.Init());
+
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ EXPECT_TRUE(trie.Init());
+ EXPECT_TRUE(trie.is_initialized());
+}
+
+TEST_F(IcingDynamicTrieTest, Iterator) {
+ // Test iterator.
+ IcingFilesystem filesystem;
+ uint32_t val;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i));
+ }
+
+ // We try everything twice to test that Reset also works.
+
+ // Should get the entire trie.
+ IcingDynamicTrie::Iterator it_all(trie, "");
+ for (int i = 0; i < 2; i++) {
+ uint32_t count = 0;
+ for (; it_all.IsValid(); it_all.Advance()) {
+ uint32_t val_idx = it_all.GetValueIndex();
+ EXPECT_EQ(it_all.GetValue(), trie.GetValueAtIndex(val_idx));
+ count++;
+ }
+ EXPECT_EQ(count, kNumKeys);
+ it_all.Reset();
+ }
+
+ // Get everything under "a".
+ IcingDynamicTrie::Iterator it1(trie, "a");
+ for (int i = 0; i < 2; i++) {
+ ASSERT_TRUE(it1.IsValid());
+ EXPECT_STREQ(it1.GetKey(), "ab");
+ static const uint32_t kOne = 1;
+ ASSERT_TRUE(it1.GetValue() != nullptr);
+ EXPECT_TRUE(!memcmp(it1.GetValue(), &kOne, sizeof(kOne)));
+
+ ASSERT_TRUE(it1.Advance());
+ ASSERT_TRUE(it1.IsValid());
+ EXPECT_STREQ(it1.GetKey(), "abbb");
+
+ ASSERT_TRUE(it1.Advance());
+ ASSERT_TRUE(it1.IsValid());
+ EXPECT_STREQ(it1.GetKey(), "abcdefg");
+
+ ASSERT_TRUE(it1.Advance());
+ ASSERT_TRUE(it1.IsValid());
+ EXPECT_STREQ(it1.GetKey(), "abd");
+
+ ASSERT_TRUE(it1.Advance());
+ ASSERT_TRUE(it1.IsValid());
+ EXPECT_STREQ(it1.GetKey(), "ac");
+
+ EXPECT_FALSE(it1.Advance());
+ EXPECT_FALSE(it1.IsValid());
+
+ it1.Reset();
+ }
+
+ // Now "b".
+ IcingDynamicTrie::Iterator it2(trie, "b");
+ for (int i = 0; i < 2; i++) {
+ ASSERT_TRUE(it2.IsValid());
+ EXPECT_STREQ(it2.GetKey(), "bac");
+ val = 1;
+ ASSERT_TRUE(it1.GetValue() != nullptr);
+ EXPECT_TRUE(!memcmp(it1.GetValue(), &val, sizeof(val)));
+ val = 4;
+ ASSERT_TRUE(it2.GetValue() != nullptr);
+ EXPECT_TRUE(!memcmp(it2.GetValue(), &val, sizeof(val)));
+
+ ASSERT_TRUE(it2.Advance());
+ ASSERT_TRUE(it2.IsValid());
+ EXPECT_STREQ(it2.GetKey(), "bacd");
+
+ ASSERT_TRUE(it2.Advance());
+ ASSERT_TRUE(it2.IsValid());
+ EXPECT_STREQ(it2.GetKey(), "bb");
+
+ EXPECT_FALSE(it2.Advance());
+ EXPECT_FALSE(it2.IsValid());
+
+ it2.Reset();
+ }
+
+ // Get everything under "ab".
+ IcingDynamicTrie::Iterator it3(trie, "ab");
+ for (int i = 0; i < 2; i++) {
+ ASSERT_TRUE(it3.IsValid());
+ EXPECT_STREQ(it3.GetKey(), "ab");
+ val = 1;
+ ASSERT_TRUE(it3.GetValue() != nullptr);
+ EXPECT_TRUE(!memcmp(it3.GetValue(), &val, sizeof(val)));
+
+ ASSERT_TRUE(it3.Advance());
+ ASSERT_TRUE(it3.IsValid());
+ EXPECT_STREQ(it3.GetKey(), "abbb");
+
+ ASSERT_TRUE(it3.Advance());
+ ASSERT_TRUE(it3.IsValid());
+ EXPECT_STREQ(it3.GetKey(), "abcdefg");
+
+ ASSERT_TRUE(it3.Advance());
+ ASSERT_TRUE(it3.IsValid());
+ EXPECT_STREQ(it3.GetKey(), "abd");
+
+ EXPECT_FALSE(it3.Advance());
+ EXPECT_FALSE(it3.IsValid());
+
+ it3.Reset();
+ }
+
+ // Should match only one key exactly.
+ constexpr std::string_view kOneMatch[] = {
+ "abd",
+ "abcd",
+ "abcdef",
+ "abcdefg",
+ };
+ // With the following match:
+ constexpr std::string_view kOneMatchMatched[] = {
+ "abd",
+ "abcdefg",
+ "abcdefg",
+ "abcdefg",
+ };
+
+ for (size_t k = 0; k < ABSL_ARRAYSIZE(kOneMatch); k++) {
+ IcingDynamicTrie::Iterator it_single(trie, kOneMatch[k].data());
+ for (int i = 0; i < 2; i++) {
+ ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
+ EXPECT_STREQ(it_single.GetKey(), kOneMatchMatched[k].data());
+ EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
+ EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
+
+ it_single.Reset();
+ }
+ }
+
+ // Matches nothing.
+ constexpr std::string_view kNoMatch[] = {
+ "abbd",
+ "abcdeg",
+ "abcdefh",
+ };
+ for (size_t k = 0; k < ABSL_ARRAYSIZE(kNoMatch); k++) {
+ IcingDynamicTrie::Iterator it_empty(trie, kNoMatch[k].data());
+ for (int i = 0; i < 2; i++) {
+ EXPECT_FALSE(it_empty.IsValid());
+
+ it_empty.Reset();
+ }
+ }
+
+ // Clear.
+ trie.Clear();
+ EXPECT_FALSE(IcingDynamicTrie::Iterator(trie, "").IsValid());
+ EXPECT_EQ(0u, trie.size());
+ EXPECT_EQ(1.0, trie.min_free_fraction());
+}
+
+TEST_F(IcingDynamicTrieTest, Persistence) {
+ // Test persistence on the English dictionary.
+ IcingFilesystem filesystem;
+ {
+ // Test with a trie including strings in words. Test will fail if
+ // words are not unique.
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ EXPECT_FALSE(trie.Init());
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &i));
+ }
+ // Explicitly omit sync.
+
+ StatsDump(trie);
+ }
+
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Init());
+ EXPECT_EQ(0U, trie.size());
+
+ for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &i));
+ }
+ trie.Sync();
+
+ StatsDump(trie);
+ }
+
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Init());
+
+ // Make sure we can find everything with the right value.
+ uint32_t found_count = 0;
+ uint32_t matched_count = 0;
+ for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+ uint32_t val;
+ bool found = trie.Find(kCommonEnglishWords[i].data(), &val);
+ if (found) {
+ found_count++;
+ if (i == val) {
+ matched_count++;
+ }
+ }
+ }
+ EXPECT_EQ(found_count, kCommonEnglishWordArrayLen);
+ EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen);
+
+ StatsDump(trie);
+ }
+}
+
+TEST_F(IcingDynamicTrieTest, PersistenceShared) {
+ // Test persistence on the English dictionary.
+ IcingFilesystem filesystem;
+ IcingDynamicTrie::RuntimeOptions ropt;
+
+ {
+ // Test with a trie including strings in words. Test will fail if
+ // words are not unique.
+ ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
+ IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
+ EXPECT_FALSE(trie.Init());
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t next_reopen = kCommonEnglishWordArrayLen / 16;
+ for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &i));
+
+ if (i == next_reopen) {
+ ASSERT_NE(0u, trie.UpdateCrc());
+ trie.Close();
+ ASSERT_TRUE(trie.Init());
+
+ next_reopen += next_reopen / 2;
+ }
+ }
+ // Explicitly omit sync. Shared should automatically persist.
+
+ StatsDump(trie);
+ }
+
+ // Go back and forth between the two policies.
+ for (int i = 0; i < 5; i++) {
+ if (i % 2 == 0) {
+ DLOG(INFO) << "Opening with map shared";
+ ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
+ } else {
+ DLOG(INFO) << "Opening with explicit flush";
+ ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kExplicitFlush;
+ }
+ IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
+ ASSERT_TRUE(trie.Init());
+
+ // Make sure we can find everything with the right value.
+ uint32_t found_count = 0;
+ uint32_t matched_count = 0;
+ for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
+ uint32_t val;
+ bool found = trie.Find(kCommonEnglishWords[i].data(), &val);
+ if (found) {
+ found_count++;
+ if (i == val) {
+ matched_count++;
+ }
+ }
+ }
+ EXPECT_EQ(found_count, kCommonEnglishWordArrayLen);
+ EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen);
+
+ StatsDump(trie);
+ }
+
+ // Clear and re-open.
+ ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
+ IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
+ ASSERT_TRUE(trie.Init());
+ trie.Clear();
+ trie.Close();
+ ASSERT_TRUE(trie.Init());
+}
+
+TEST_F(IcingDynamicTrieTest, Sync) {
+ IcingFilesystem filesystem;
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i));
+
+ uint32_t val;
+ bool found = trie.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+ }
+
+ StatsDump(trie);
+ PrintTrie(trie);
+
+ trie.Sync();
+
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ uint32_t val;
+ bool found = trie.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+ }
+ }
+
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Init());
+
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ uint32_t val;
+ bool found = trie.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+ }
+
+ StatsDump(trie);
+ PrintTrie(trie);
+ }
+}
+
+TEST_F(IcingDynamicTrieTest, LimitsZero) {
+ // Don't crash if we set limits to 0.
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_FALSE(trie.CreateIfNotExist(IcingDynamicTrie::Options(0, 0, 0, 0)));
+}
+
+TEST_F(IcingDynamicTrieTest, LimitsSmall) {
+ // Test limits with a few keys.
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(
+ IcingDynamicTrie::Options(10, 300, 30, sizeof(uint32_t))));
+ ASSERT_TRUE(trie.Init());
+
+ ASSERT_LT(3U, kNumKeys);
+
+ for (uint32_t i = 0; i < 3; i++) {
+ ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i)) << i;
+
+ uint32_t val;
+ bool found = trie.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+ }
+
+ uint32_t val = 3;
+ EXPECT_FALSE(trie.Insert(kKeys[3].data(), &val));
+
+ StatsDump(trie);
+ PrintTrie(trie);
+}
+
+TEST_F(IcingDynamicTrieTest, DISABLEDFingerprintedKeys) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie::Options options(4 << 20, 4 << 20, 20 << 20,
+ sizeof(uint32_t));
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(options));
+ ASSERT_TRUE(trie.Init());
+ IcingDynamicTrie triefp(trie_files_prefix_ + ".fps",
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(triefp.CreateIfNotExist(options));
+ ASSERT_TRUE(triefp.Init());
+
+ static const uint32_t kNumKeys = 1000000;
+ std::string key;
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ key.clear();
+ IcingStringUtil::SStringAppendF(
+ &key, 1000, "content://gmail-ls/account/conversation/%u/message/%u", i,
+ 10 * i);
+ ASSERT_TRUE(trie.Insert(key.c_str(), &i));
+
+ // Now compute a fingerprint.
+ uint64_t fpkey = tc3farmhash::Fingerprint64(key);
+
+ // Convert to base255 since keys in trie cannot contain 0.
+ uint8_t fpkey_base255[9];
+ for (int j = 0; j < 8; j++) {
+ fpkey_base255[j] = (fpkey % 255) + 1;
+ fpkey /= 255;
+ }
+ fpkey_base255[8] = '\0';
+ ASSERT_TRUE(
+ triefp.Insert(reinterpret_cast<const char*>(fpkey_base255), &i));
+
+ // Sync periodically to gauge write locality.
+ if ((i + 1) % (kNumKeys / 10) == 0) {
+ DLOG(INFO) << "Trie sync";
+ trie.Sync();
+ DLOG(INFO) << "Trie fp sync";
+ triefp.Sync();
+ }
+ }
+
+ DLOG(INFO) << "Trie stats";
+ StatsDump(trie);
+ DLOG(INFO) << "Trie fp stats";
+ StatsDump(triefp);
+}
+
+TEST_F(IcingDynamicTrieTest, AddDups) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ static const uint32_t kNumKeys = 5000;
+ AddToTrie(&trie, kNumKeys);
+ CheckTrie(trie, kNumKeys);
+
+ DLOG(INFO) << "Trie stats";
+ StatsDump(trie);
+
+ AddToTrie(&trie, kNumKeys);
+ CheckTrie(trie, kNumKeys);
+ DLOG(INFO) << "Trie stats";
+ StatsDump(trie);
+}
+
+TEST_F(IcingDynamicTrieTest, Properties) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ static const uint32_t kOne = 1;
+ uint32_t val_idx;
+ trie.Insert("abcd", &kOne, &val_idx, false);
+ trie.SetProperty(val_idx, 0);
+ trie.SetProperty(val_idx, 3);
+
+ {
+ IcingDynamicTrie::PropertyReader reader(trie, 3);
+ ASSERT_TRUE(reader.Exists());
+ EXPECT_TRUE(reader.HasProperty(val_idx));
+ EXPECT_FALSE(reader.HasProperty(1000));
+ }
+
+ // Disappear after close.
+ trie.Close();
+ ASSERT_TRUE(trie.Init());
+ {
+ IcingDynamicTrie::PropertyReader reader(trie, 3);
+ EXPECT_FALSE(reader.HasProperty(val_idx));
+ }
+
+ // Persist after sync.
+ trie.Insert("abcd", &kOne, &val_idx, false);
+ trie.SetProperty(val_idx, 1);
+ ASSERT_TRUE(trie.Sync());
+ trie.Close();
+ ASSERT_TRUE(trie.Init());
+
+ uint32_t val;
+ ASSERT_TRUE(trie.Find("abcd", &val, &val_idx));
+ EXPECT_EQ(1u, val);
+ {
+ IcingDynamicTrie::PropertyReader reader(trie, 1);
+ EXPECT_TRUE(reader.HasProperty(val_idx));
+ }
+
+ // Get all.
+ {
+ IcingDynamicTrie::PropertyReadersAll readers(trie);
+ ASSERT_EQ(4u, readers.size());
+ EXPECT_TRUE(readers.Exists(0));
+ EXPECT_TRUE(readers.Exists(1));
+ EXPECT_FALSE(readers.Exists(2));
+ EXPECT_TRUE(readers.Exists(3));
+ }
+}
+
+TEST_F(IcingDynamicTrieTest, ClearSingleProperty) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ static const uint32_t kOne = 1;
+ uint32_t val_idx[3];
+ trie.Insert("abcd", &kOne, &val_idx[0], false);
+ trie.SetProperty(val_idx[0], 0);
+ trie.SetProperty(val_idx[0], 3);
+
+ trie.Insert("efgh", &kOne, &val_idx[1], false);
+ trie.SetProperty(val_idx[1], 0);
+ trie.SetProperty(val_idx[1], 3);
+
+ trie.Insert("ijkl", &kOne, &val_idx[2], false);
+ trie.SetProperty(val_idx[2], 0);
+ trie.SetProperty(val_idx[2], 3);
+
+ {
+ IcingDynamicTrie::PropertyReadersAll readers(trie);
+ ASSERT_EQ(4u, readers.size());
+ EXPECT_TRUE(readers.Exists(0));
+ EXPECT_FALSE(readers.Exists(1));
+ EXPECT_FALSE(readers.Exists(2));
+ EXPECT_TRUE(readers.Exists(3));
+ for (size_t i = 0; i < readers.size(); i++) {
+ if (readers.Exists(i)) {
+ for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) {
+ EXPECT_TRUE(readers.HasProperty(i, val_idx[j]));
+ }
+ }
+ }
+ }
+
+ EXPECT_TRUE(trie.ClearPropertyForAllValues(3));
+
+ {
+ IcingDynamicTrie::PropertyReadersAll readers(trie);
+ ASSERT_EQ(4u, readers.size());
+ EXPECT_TRUE(readers.Exists(0));
+ EXPECT_FALSE(readers.Exists(1));
+ EXPECT_FALSE(readers.Exists(2));
+ // Clearing the property causes all values to be deleted.
+ EXPECT_FALSE(readers.Exists(3));
+ for (size_t i = 0; i < readers.size(); i++) {
+ for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) {
+ if (i == 0) {
+ EXPECT_TRUE(readers.HasProperty(i, val_idx[j]));
+ } else {
+ EXPECT_FALSE(readers.HasProperty(i, val_idx[j]));
+ }
+ }
+ }
+ }
+}
+
+TEST_F(IcingDynamicTrieTest, Compact) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i));
+
+ uint32_t val;
+ bool found = trie.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
+ }
+
+ EXPECT_EQ(trie.size(), kNumKeys);
+
+ StatsDump(trie);
+ PrintTrie(trie);
+
+ IcingDynamicTrie trie2(trie_files_prefix_ + "-2",
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie2.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie2.Init());
+
+ std::unordered_map<uint32_t, uint32_t> old_to_new_tvi;
+ trie.Compact(NewValueMap(), &trie2, &old_to_new_tvi);
+ for (uint32_t i = 0; i < kNumKeys; i++) {
+ uint32_t val;
+ bool found = trie2.Find(kKeys[i].data(), &val);
+ EXPECT_TRUE(found) << kKeys[i];
+ EXPECT_TRUE(old_to_new_tvi.find(val) != old_to_new_tvi.end());
+ }
+}
+
+} // namespace
+
+// The tests below are accessing private methods and fields of IcingDynamicTrie
+// so can't be in the anonymous namespace.
+
+TEST_F(IcingDynamicTrieTest, TrieShouldRespectLimits) {
+ // Test limits on numbers of nodes, nexts, and suffixes size.
+ IcingFilesystem filesystem;
+
+ // These 3 numbers are the entities we need in order to insert all the test
+ // words before the last one.
+ uint32_t num_nodes_enough;
+ uint32_t num_nexts_enough;
+ uint32_t suffixes_size_enough;
+
+ // First, try to fill the 3 numbers above.
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Remove());
+ // Creates a trie with enough numbers of nodes, nexts, and suffix file size.
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+ /*max_nodes_in=*/1000, /*max_nexts_in=*/1000,
+ /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
+ ASSERT_TRUE(trie.Init());
+
+ // Inserts all the test words before the last one.
+ uint32_t value = 0;
+ for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value));
+ }
+
+ IcingDynamicTrieHeader header;
+ trie.GetHeader(&header);
+
+ // Before each insertion, it requires that there're (2 + 1 + key_length)
+ // nodes left, so we need 8 nodes to insert the last word. +7 here will make
+ // it just enough to insert the word before the last one.
+ num_nodes_enough = header.num_nodes() + 7;
+
+ // Before each insertion, it requires that there're (2 + 1 + key_length +
+ // kMaxNextArraySize) nexts left, so we need (8 + kMaxNextArraySize) nexts
+ // to insert the last word. (7 + kMaxNextArraySize) here will make it just
+ // enough to insert the word before the last one.
+ num_nexts_enough =
+ header.num_nexts() + 7 + IcingDynamicTrie::kMaxNextArraySize;
+
+ // Before each insertion, it requires that there're (1 + key_length +
+ // value_size) bytes left for suffixes, so we need (6 + sizeof(uint32_t))
+ // bytes to insert the last word. (5 + sizeof(uint32_t)) here will make it
+ // just enough to insert the word before the last one.
+ suffixes_size_enough = header.suffixes_size() + 5 + sizeof(uint32_t);
+ }
+
+ // Test a trie with just enough number of nodes.
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Remove());
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+ num_nodes_enough, /*max_nexts_in=*/1000,
+ /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
+ ASSERT_TRUE(trie.Init());
+
+ // Inserts all the test words before the last one.
+ uint32_t value = 0;
+ for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value));
+ }
+
+ // Fails to insert the last word because no enough nodes left.
+ EXPECT_FALSE(trie.Insert(
+ kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(), &value));
+ }
+
+ // Test a trie with just enough number of nexts.
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Remove());
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+ /*max_nodes_in=*/1000, num_nexts_enough,
+ /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
+ ASSERT_TRUE(trie.Init());
+
+ // Inserts all the test words before the last one.
+ uint32_t value = 0;
+ for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value));
+ }
+
+ // Fails to insert the last word because no enough nexts left.
+ EXPECT_FALSE(trie.Insert(
+ kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(), &value));
+ }
+
+ // Test a trie with just enough suffixes size.
+ {
+ IcingDynamicTrie trie(trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions(), &filesystem);
+ ASSERT_TRUE(trie.Remove());
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
+ /*max_nodes_in=*/1000, /*max_nexts_in=*/1000, suffixes_size_enough,
+ sizeof(uint32_t))));
+ ASSERT_TRUE(trie.Init());
+
+ // Inserts all the test words before the last one.
+ uint32_t value = 0;
+ for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
+ ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value));
+ }
+
+ // Fails to insert the last word because no enough space for more suffixes.
+ EXPECT_FALSE(trie.Insert(
+ kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(), &value));
+ }
+}
+
+TEST_F(IcingDynamicTrieTest, SyncErrorRecovery) {
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+
+ static const uint32_t kNumKeys = 5000;
+ AddToTrie(&trie, kNumKeys);
+ CheckTrie(trie, kNumKeys);
+
+ trie.Sync();
+ trie.Close();
+
+ // Reach into the file and set the value_size.
+ ASSERT_TRUE(trie.Init());
+ IcingDynamicTrieHeader hdr;
+ trie.GetHeader(&hdr);
+ hdr.set_value_size(hdr.value_size() + 123);
+ trie.SetHeader(hdr);
+ trie.Close();
+
+ ASSERT_FALSE(trie.Init());
+}
+
+TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) {
+ // Create trie with one property.
+ IcingFilesystem filesystem;
+ IcingDynamicTrie trie(
+ trie_files_prefix_,
+ IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+ IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
+ &filesystem);
+ ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
+ ASSERT_TRUE(trie.Init());
+ ASSERT_TRUE(trie.deleted_bitmap_);
+ trie.SetProperty(0, 0);
+ ASSERT_EQ(1, trie.property_bitmaps_.size());
+ ASSERT_TRUE(trie.property_bitmaps_[0]);
+ trie.Close();
+
+ // Intentionally corrupt deleted_bitmap file to make Init() fail.
+ FILE* fp = fopen(trie.deleted_bitmap_filename_.c_str(), "r+");
+ ASSERT_TRUE(fp);
+ ASSERT_EQ(16, fwrite("################", 1, 16, fp));
+ fclose(fp);
+ ASSERT_FALSE(trie.Init());
+
+ // Check that both the bitmap and the property files have been closed.
+ ASSERT_FALSE(trie.deleted_bitmap_);
+ ASSERT_EQ(0, trie.property_bitmaps_.size());
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/legacy/index/icing-filesystem.h b/icing/legacy/index/icing-filesystem.h
index 2b10c1c..f645632 100644
--- a/icing/legacy/index/icing-filesystem.h
+++ b/icing/legacy/index/icing-filesystem.h
@@ -17,13 +17,15 @@
#ifndef ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_
#define ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
+#include <sys/types.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
#include <memory>
#include <string>
#include <unordered_set>
+#include <utility>
#include <vector>
namespace icing {
diff --git a/icing/legacy/index/icing-mmapper.h b/icing/legacy/index/icing-mmapper.h
index bf62aa5..d054c11 100644
--- a/icing/legacy/index/icing-mmapper.h
+++ b/icing/legacy/index/icing-mmapper.h
@@ -22,9 +22,11 @@
#ifndef ICING_LEGACY_INDEX_ICING_MMAPPER_H_
#define ICING_LEGACY_INDEX_ICING_MMAPPER_H_
-#include <stdint.h>
#include <unistd.h>
+#include <cstddef>
+#include <cstdint>
+
namespace icing {
namespace lib {
diff --git a/icing/legacy/index/icing-storage.h b/icing/legacy/index/icing-storage.h
index cc06c54..58b6aa1 100644
--- a/icing/legacy/index/icing-storage.h
+++ b/icing/legacy/index/icing-storage.h
@@ -20,6 +20,7 @@
#ifndef ICING_LEGACY_INDEX_ICING_STORAGE_H_
#define ICING_LEGACY_INDEX_ICING_STORAGE_H_
+#include <cstdint>
#include <string>
namespace icing {
diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc
new file mode 100644
index 0000000..0e2eb2a
--- /dev/null
+++ b/icing/testing/hit-test-utils.cc
@@ -0,0 +1,58 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/hit-test-utils.h"
+
+namespace icing {
+namespace lib {
+
+// Returns a hit that has a delta of desired_byte_length from last_hit.
+Hit CreateHit(Hit last_hit, int desired_byte_length) {
+ Hit hit =
+ (last_hit.section_id() == kMinSectionId)
+ ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score())
+ : Hit(last_hit.section_id() - 1, last_hit.document_id(),
+ last_hit.score());
+ uint8_t buf[5];
+ while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
+ desired_byte_length) {
+ hit = (hit.section_id() == kMinSectionId)
+ ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score())
+ : Hit(hit.section_id() - 1, hit.document_id(), hit.score());
+ }
+ return hit;
+}
+
+// Returns a vector of num_hits Hits with the first hit starting at start_docid
+// and with 1-byte deltas.
+std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
+ int desired_byte_length) {
+ std::vector<Hit> hits;
+ if (num_hits < 1) {
+ return hits;
+ }
+ hits.push_back(
+ Hit(/*section_id=*/1, /*document_id=*/start_docid, Hit::kMaxHitScore));
+ while (hits.size() < num_hits) {
+ hits.push_back(CreateHit(hits.back(), desired_byte_length));
+ }
+ return hits;
+}
+
+std::vector<Hit> CreateHits(int num_hits, int desired_byte_length) {
+ return CreateHits(/*start_docid=*/0, num_hits, desired_byte_length);
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/testing/hit-test-utils.h b/icing/testing/hit-test-utils.h
new file mode 100644
index 0000000..e236ec0
--- /dev/null
+++ b/icing/testing/hit-test-utils.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_HIT_TEST_UTILS_H_
+#define ICING_TESTING_HIT_TEST_UTILS_H_
+
+#include <vector>
+
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Returns a hit that has a delta of desired_byte_length from last_hit.
+Hit CreateHit(Hit last_hit, int desired_byte_length);
+
+// Returns a vector of num_hits Hits with the first hit starting at start_docid
+// and with desired_byte_length deltas.
+std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
+ int desired_byte_length);
+
+// Returns a vector of num_hits Hits with the first hit starting at 0 and each
+// with desired_byte_length deltas.
+std::vector<Hit> CreateHits(int num_hits, int desired_byte_length);
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_HIT_TEST_UTILS_H_
diff --git a/icing/text_classifier/lib3/utils/base/logging.h b/icing/text_classifier/lib3/utils/base/logging.h
index bf02f65..92d775e 100644
--- a/icing/text_classifier/lib3/utils/base/logging.h
+++ b/icing/text_classifier/lib3/utils/base/logging.h
@@ -22,7 +22,6 @@
#include "icing/text_classifier/lib3/utils/base/logging_levels.h"
#include "icing/text_classifier/lib3/utils/base/port.h"
-
namespace libtextclassifier3 {
namespace logging {
diff --git a/icing/text_classifier/lib3/utils/base/statusor.h b/icing/text_classifier/lib3/utils/base/statusor.h
index f5fae7a..9ec3d91 100644
--- a/icing/text_classifier/lib3/utils/base/statusor.h
+++ b/icing/text_classifier/lib3/utils/base/statusor.h
@@ -86,6 +86,8 @@ class StatusOr {
// Conversion assignment operator, T must be assignable from U
template <typename U>
inline StatusOr& operator=(const StatusOr<U>& other);
+ template <typename U>
+ inline StatusOr& operator=(StatusOr<U>&& other);
inline ~StatusOr();
@@ -134,6 +136,40 @@ class StatusOr {
friend class StatusOr;
private:
+ void Clear() {
+ if (ok()) {
+ value_.~T();
+ }
+ }
+
+ // Construct the value through placement new with the passed argument.
+ template <typename... Arg>
+ void MakeValue(Arg&&... arg) {
+ new (&value_) T(std::forward<Arg>(arg)...);
+ }
+
+ // Creates a valid instance of type T constructed with U and assigns it to
+ // value_. Handles how to properly assign to value_ if value_ was never
+ // actually initialized (if this is currently non-OK).
+ template <typename U>
+ void AssignValue(U&& value) {
+ if (ok()) {
+ value_ = std::forward<U>(value);
+ } else {
+ MakeValue(std::forward<U>(value));
+ status_ = Status::OK;
+ }
+ }
+
+ // Creates a status constructed with U and assigns it to status_. It also
+ // properly destroys value_ if this is OK and value_ represents a valid
+ // instance of T.
+ template <typename U>
+ void AssignStatus(U&& v) {
+ Clear();
+ status_ = static_cast<Status>(std::forward<U>(v));
+ }
+
Status status_;
// The members of unions do not require initialization and are not destructed
// unless specifically called. This allows us to construct instances of
@@ -210,35 +246,47 @@ inline StatusOr<T>::StatusOr(U&& value) : StatusOr(T(std::forward<U>(value))) {}
template <typename T>
inline StatusOr<T>& StatusOr<T>::operator=(const StatusOr& other) {
- status_ = other.status_;
- if (status_.ok()) {
- value_ = other.value_;
+ if (other.ok()) {
+ AssignValue(other.value_);
+ } else {
+ AssignStatus(other.status_);
}
return *this;
}
template <typename T>
inline StatusOr<T>& StatusOr<T>::operator=(StatusOr&& other) {
- status_ = other.status_;
- if (status_.ok()) {
- value_ = std::move(other.value_);
+ if (other.ok()) {
+ AssignValue(std::move(other.value_));
+ } else {
+ AssignStatus(std::move(other.status_));
}
return *this;
}
template <typename T>
inline StatusOr<T>::~StatusOr() {
- if (ok()) {
- value_.~T();
- }
+ Clear();
}
template <typename T>
template <typename U>
inline StatusOr<T>& StatusOr<T>::operator=(const StatusOr<U>& other) {
- status_ = other.status_;
- if (status_.ok()) {
- value_ = other.value_;
+ if (other.ok()) {
+ AssignValue(other.value_);
+ } else {
+ AssignStatus(other.status_);
+ }
+ return *this;
+}
+
+template <typename T>
+template <typename U>
+inline StatusOr<T>& StatusOr<T>::operator=(StatusOr<U>&& other) {
+ if (other.ok()) {
+ AssignValue(std::move(other.value_));
+ } else {
+ AssignStatus(std::move(other.status_));
}
return *this;
}