diff options
author | Cassie Wang <cassiewang@google.com> | 2020-07-21 14:26:01 -0700 |
---|---|---|
committer | Cassie Wang <cassiewang@google.com> | 2020-07-24 14:19:26 -0700 |
commit | c994b6ea30c9be8976da0b1bf6a8923907ff903f (patch) | |
tree | f69970caf14ff26cfea1f3a573c2c27a7cd83e3d /icing | |
parent | 09d66401215254a2bdfc134009039636054d28d2 (diff) | |
download | icing-c994b6ea30c9be8976da0b1bf6a8923907ff903f.tar.gz |
Pull upstream changes.
Change-Id: Ieed20fd00a7c00778045434ae1b7c9e019a6c369
Diffstat (limited to 'icing')
29 files changed, 2395 insertions, 244 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h index 27d03b2..e4ec0cd 100644 --- a/icing/file/file-backed-vector.h +++ b/icing/file/file-backed-vector.h @@ -181,7 +181,9 @@ class FileBackedVector { // OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size libtextclassifier3::Status Set(int32_t idx, const T& value); - // Resizes to first len elements. The crc is not updated on truncation. + // Resizes to first len elements. The crc is cleared on truncation and will be + // updated on destruction, or once the client calls ComputeChecksum() or + // PersistToDisk(). // // Returns: // OUT_OF_RANGE_ERROR if len < 0 or >= num_elements() @@ -577,6 +579,13 @@ libtextclassifier3::Status FileBackedVector<T>::TruncateTo( new_num_elements, header_->num_elements)); } + ICING_VLOG(2) + << "FileBackedVector truncating, need to recalculate entire checksum"; + changes_.clear(); + saved_original_buffer_.clear(); + changes_end_ = 0; + header_->vector_checksum = 0; + header_->num_elements = new_num_elements; return libtextclassifier3::Status::OK; } @@ -653,6 +662,7 @@ libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() { } cur_offset += sizeof(T); } + if (!changes_.empty()) { ICING_VLOG(2) << IcingStringUtil::StringPrintf( "Array update partial crcs %d truncated %d overlapped %d duplicate %d", diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc index 7561b57..6c3b931 100644 --- a/icing/file/file-backed-vector_test.cc +++ b/icing/file/file-backed-vector_test.cc @@ -158,8 +158,8 @@ TEST_F(FileBackedVectorTest, SimpleShared) { // Truncate the content ICING_EXPECT_OK(vector->TruncateTo(0)); - // We don't automatically update the crc when we truncate. - EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc)); + // Crc is cleared after truncation and reset to 0. + EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0))); EXPECT_EQ(0u, vector->num_elements()); } @@ -409,10 +409,10 @@ TEST_F(FileBackedVectorTest, TruncateTo) { EXPECT_EQ(1, vector->num_elements()); EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534))); - // Truncating doesn't cause the checksum to be updated. + // Truncating clears the checksum and resets it to 0 ICING_EXPECT_OK(vector->TruncateTo(0)); EXPECT_EQ(0, vector->num_elements()); - EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534))); + EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0))); // Can't truncate past end. EXPECT_THAT(vector->TruncateTo(100), @@ -423,6 +423,46 @@ TEST_F(FileBackedVectorTest, TruncateTo) { StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); } +TEST_F(FileBackedVectorTest, TruncateAndReReadFile) { + { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<float>> vector, + FileBackedVector<float>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + ICING_ASSERT_OK(vector->Set(0, 1.0)); + ICING_ASSERT_OK(vector->Set(1, 2.0)); + ICING_ASSERT_OK(vector->Set(2, 2.0)); + ICING_ASSERT_OK(vector->Set(3, 2.0)); + ICING_ASSERT_OK(vector->Set(4, 2.0)); + } // Destroying the vector should trigger a checksum of the 5 elements + + { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<float>> vector, + FileBackedVector<float>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + EXPECT_EQ(5, vector->num_elements()); + ICING_EXPECT_OK(vector->TruncateTo(4)); + EXPECT_EQ(4, vector->num_elements()); + } // Destroying the vector should update the checksum to 4 elements + + // Creating again should double check that our checksum of 4 elements matches + // what was previously saved. + { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<float>> vector, + FileBackedVector<float>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + EXPECT_EQ(vector->num_elements(), 4); + } +} + } // namespace } // namespace lib diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc index 34365a9..bda01f2 100644 --- a/icing/file/memory-mapped-file.cc +++ b/icing/file/memory-mapped-file.cc @@ -37,6 +37,23 @@ MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem, file_path_(file_path), strategy_(mmap_strategy) {} +MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other) + // Make sure that mmap_result_ is a nullptr before we call Swap. We don't + // care what values the remaining members hold before we swap into other, + // but if mmap_result_ holds a non-NULL value before we initialized anything + // then other will try to free memory at that address when it's destroyed! + : mmap_result_(nullptr) { + Swap(&other); +} + +MemoryMappedFile& MemoryMappedFile::operator=(MemoryMappedFile&& other) { + // Swap all of our elements with other. This will ensure that both this now + // holds other's previous resources and that this's previous resources will be + // properly freed when other is destructed at the end of this function. + Swap(&other); + return *this; +} + MemoryMappedFile::~MemoryMappedFile() { Unmap(); } void MemoryMappedFile::MemoryMappedFile::Unmap() { @@ -167,5 +184,16 @@ libtextclassifier3::Status MemoryMappedFile::OptimizeFor( return libtextclassifier3::Status::OK; } +void MemoryMappedFile::Swap(MemoryMappedFile* other) { + std::swap(filesystem_, other->filesystem_); + std::swap(file_path_, other->file_path_); + std::swap(strategy_, other->strategy_); + std::swap(file_offset_, other->file_offset_); + std::swap(region_, other->region_); + std::swap(region_size_, other->region_size_); + std::swap(adjusted_mmap_size_, other->adjusted_mmap_size_); + std::swap(mmap_result_, other->mmap_result_); +} + } // namespace lib } // namespace icing diff --git a/icing/file/memory-mapped-file.h b/icing/file/memory-mapped-file.h index 5a52368..1d31a42 100644 --- a/icing/file/memory-mapped-file.h +++ b/icing/file/memory-mapped-file.h @@ -79,7 +79,10 @@ class MemoryMappedFile { // file_path : Full path of the file that needs to be memory-mapped. MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path, Strategy mmap_strategy); - + MemoryMappedFile(const MemoryMappedFile& other) = delete; + MemoryMappedFile(MemoryMappedFile&& other); + MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete; + MemoryMappedFile& operator=(MemoryMappedFile&& other); // Frees any region that is still memory-mapped region. ~MemoryMappedFile(); @@ -134,10 +137,13 @@ class MemoryMappedFile { Strategy strategy() const { return strategy_; } private: + // Swaps the contents of this with other. + void Swap(MemoryMappedFile* other); + // Cached constructor params. - const Filesystem* const filesystem_; - const std::string file_path_; - const Strategy strategy_; + const Filesystem* filesystem_; + std::string file_path_; + Strategy strategy_; // Offset within the file at which the current memory-mapped region starts. size_t file_offset_ = 0; diff --git a/icing/index/index.cc b/icing/index/index.cc index 927acaf..d4a2508 100644 --- a/icing/index/index.cc +++ b/icing/index/index.cc @@ -26,7 +26,7 @@ #include "icing/index/hit/hit.h" #include "icing/index/iterator/doc-hit-info-iterator-term.h" #include "icing/index/iterator/doc-hit-info-iterator.h" -#include "icing/index/lite-index.h" +#include "icing/index/lite/lite-index.h" #include "icing/index/term-id-codec.h" #include "icing/index/term-property-id.h" #include "icing/legacy/core/icing-string-util.h" diff --git a/icing/index/index.h b/icing/index/index.h index f30c8ad..d8e409c 100644 --- a/icing/index/index.h +++ b/icing/index/index.h @@ -25,7 +25,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/hit/hit.h" #include "icing/index/iterator/doc-hit-info-iterator.h" -#include "icing/index/lite-index.h" +#include "icing/index/lite/lite-index.h" #include "icing/index/term-id-codec.h" #include "icing/index/term-metadata.h" #include "icing/legacy/index/icing-filesystem.h" diff --git a/icing/index/iterator/doc-hit-info-iterator-term.h b/icing/index/iterator/doc-hit-info-iterator-term.h index 7d02fc2..21d1dd6 100644 --- a/icing/index/iterator/doc-hit-info-iterator-term.h +++ b/icing/index/iterator/doc-hit-info-iterator-term.h @@ -21,7 +21,7 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" -#include "icing/index/lite-index.h" +#include "icing/index/lite/lite-index.h" #include "icing/index/term-id-codec.h" #include "icing/schema/section.h" diff --git a/icing/index/lite-index.cc b/icing/index/lite/lite-index.cc index 489c53d..a72402e 100644 --- a/icing/index/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/index/lite-index.h" +#include "icing/index/lite/lite-index.h" #include <inttypes.h> #include <stddef.h> diff --git a/icing/index/lite-index.h b/icing/index/lite/lite-index.h index b60a947..b60a947 100644 --- a/icing/index/lite-index.h +++ b/icing/index/lite/lite-index.h diff --git a/icing/index/main/index-block.cc b/icing/index/main/index-block.cc new file mode 100644 index 0000000..9d7df3c --- /dev/null +++ b/icing/index/main/index-block.cc @@ -0,0 +1,201 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/main/index-block.h" + +#include <inttypes.h> + +#include <algorithm> +#include <limits> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/file/memory-mapped-file.h" +#include "icing/index/main/posting-list-free.h" +#include "icing/index/main/posting-list-utils.h" +#include "icing/legacy/core/icing-string-util.h" +#include "icing/util/math-util.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +namespace { + +libtextclassifier3::Status ValidatePostingListBytes(uint32_t posting_list_bytes, + uint32_t block_size) { + if (posting_list_bytes > + IndexBlock::CalculateMaxPostingListBytes(block_size) || + !posting_list_utils::IsValidPostingListSize(posting_list_bytes)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Requested posting list size %d is illegal for a flash block with max " + "posting list size of %d", + posting_list_bytes, + IndexBlock::CalculateMaxPostingListBytes(block_size))); + } + return libtextclassifier3::Status::OK; +} + +} // namespace + +uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock( + uint32_t block_size, int posting_list_index_bits) { + // Assume 50% compressed and most don't have scores. + uint32_t bytes_per_hit = sizeof(Hit::Value) / 2; + return (block_size - sizeof(BlockHeader)) / + ((1u << posting_list_index_bits) * bytes_per_hit); +} + +libtextclassifier3::StatusOr<IndexBlock> +IndexBlock::CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem, + std::string_view file_path, + off_t offset, + uint32_t block_size) { + if (block_size < sizeof(BlockHeader)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Provided block_size %d is too small to fit even the BlockHeader!", + block_size)); + } + MemoryMappedFile mmapped_file( + filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC); + ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size)); + IndexBlock block(std::move(mmapped_file)); + ICING_RETURN_IF_ERROR( + ValidatePostingListBytes(block.get_posting_list_bytes(), block_size)); + return block; +} + +libtextclassifier3::StatusOr<IndexBlock> +IndexBlock::CreateFromUninitializedRegion(const Filesystem& filesystem, + std::string_view file_path, + off_t offset, uint32_t block_size, + uint32_t posting_list_bytes) { + if (block_size < sizeof(BlockHeader)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Provided block_size %d is too small to fit even the BlockHeader!", + block_size)); + } + ICING_RETURN_IF_ERROR( + ValidatePostingListBytes(posting_list_bytes, block_size)); + MemoryMappedFile mmapped_file( + filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC); + ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size)); + IndexBlock block(std::move(mmapped_file)); + // Safe to ignore the return value of Reset. Reset returns an error if + // posting_list_bytes is invalid, but this function ensures that + // posting_list_bytes is valid thanks to the call to ValidatePostingListBytes + // above. + block.Reset(posting_list_bytes); + return block; +} + +IndexBlock::IndexBlock(MemoryMappedFile mmapped_block) + : header_(reinterpret_cast<BlockHeader*>(mmapped_block.mutable_region())), + posting_lists_start_ptr_(mmapped_block.mutable_region() + + sizeof(BlockHeader)), + block_size_in_bytes_(mmapped_block.region_size()), + mmapped_block_(std::move(mmapped_block)) {} + +libtextclassifier3::Status IndexBlock::Reset(int posting_list_bytes) { + ICING_RETURN_IF_ERROR(ValidatePostingListBytes(posting_list_bytes, + mmapped_block_.region_size())); + header_->free_list_posting_list_index = kInvalidPostingListIndex; + header_->next_block_index = kInvalidBlockIndex; + header_->posting_list_bytes = posting_list_bytes; + + // Starting with the last posting list, prepend each posting list to the free + // list. At the end, the beginning of the free list should be the first + // posting list. + for (PostingListIndex posting_list_index = max_num_posting_lists() - 1; + posting_list_index >= 0; --posting_list_index) { + // Adding the posting list at posting_list_index to the free list will + // modify both the posting list and also + // header_->free_list_posting_list_index. + FreePostingList(posting_list_index); + } + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::StatusOr<PostingListUsed> +IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) { + if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Cannot get posting list with index %d in IndexBlock with only %d " + "posting lists.", + posting_list_index, max_num_posting_lists())); + } + return PostingListUsed::CreateFromPreexistingPostingListUsedRegion( + get_posting_list_ptr(posting_list_index), get_posting_list_bytes()); +} + +libtextclassifier3::StatusOr<PostingListIndex> +IndexBlock::AllocatePostingList() { + if (!has_free_posting_lists()) { + return absl_ports::ResourceExhaustedError( + "No available posting lists to allocate."); + } + + // Pull one off the free list. + PostingListIndex posting_list_index = header_->free_list_posting_list_index; + + // We know at this point that posting_list_bytes will return a valid pl size + // (because an already initialized IndexBlock instance can't have an invalid + // posting_list_bytes). So CreateFromPreexistingPostingListFreeRegion will + // always return OK and ValueOrDie is safe to call. + auto posting_list_or = + PostingListFree::CreateFromPreexistingPostingListFreeRegion( + get_posting_list_ptr(posting_list_index), get_posting_list_bytes()); + PostingListFree plfree = std::move(posting_list_or).ValueOrDie(); + + header_->free_list_posting_list_index = plfree.get_next_posting_list_index(); + if (header_->free_list_posting_list_index != kInvalidPostingListIndex && + header_->free_list_posting_list_index >= max_num_posting_lists()) { + ICING_LOG(ERROR) + << "Free Posting List points to an invalid posting list index!"; + header_->free_list_posting_list_index = kInvalidPostingListIndex; + } + + // Make it a used posting list. + PostingListUsed::CreateFromUnitializedRegion( + get_posting_list_ptr(posting_list_index), get_posting_list_bytes()); + return posting_list_index; +} + +void IndexBlock::FreePostingList(PostingListIndex posting_list_index) { + if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) { + ICING_LOG(ERROR) << "Cannot free posting list with index " + << posting_list_index << " in IndexBlock with only " + << max_num_posting_lists() << " posting lists."; + return; + } + + // We know at this point that posting_list_bytes will return a valid pl size. + // So CreateFromUninitializedRegion will always return OK and ValueOrDie is + // safe to call. + auto posting_list_or = PostingListFree::CreateFromUnitializedRegion( + get_posting_list_ptr(posting_list_index), get_posting_list_bytes()); + PostingListFree plfree = std::move(posting_list_or).ValueOrDie(); + + // Put at the head of the list. + plfree.set_next_posting_list_index(header_->free_list_posting_list_index); + header_->free_list_posting_list_index = posting_list_index; +} + +char* IndexBlock::get_posting_list_ptr(PostingListIndex posting_list_index) { + return posting_lists_start_ptr_ + + get_posting_list_bytes() * posting_list_index; +} + +} // namespace lib +} // namespace icing diff --git a/icing/index/main/index-block.h b/icing/index/main/index-block.h new file mode 100644 index 0000000..1d17e34 --- /dev/null +++ b/icing/index/main/index-block.h @@ -0,0 +1,215 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_INDEX_MAIN_INDEX_BLOCK_H_ +#define ICING_INDEX_MAIN_INDEX_BLOCK_H_ + +#include <string.h> +#include <sys/mman.h> + +#include <algorithm> +#include <limits> +#include <string> +#include <unordered_set> +#include <vector> + +#include "icing/file/memory-mapped-file.h" +#include "icing/index/hit/hit.h" +#include "icing/index/main/posting-list-free.h" +#include "icing/index/main/posting-list-used.h" +#include "icing/legacy/index/icing-bit-util.h" + +namespace icing { +namespace lib { + +inline constexpr uint32_t kInvalidBlockIndex = 0; + +// This class is used to manage I/O to a single flash block and to manage the +// division of that flash block into PostingLists. It provides an interface to +// allocate, free and read posting lists. +// +// An IndexBlock contains a small header and an array of fixed-size posting list +// buffers. Initially, all posting lists are chained in a singly-linked free +// list. +// +// When we want to get a new PostingList from an IndexBlock, we just +// pull one off the free list. When the user wants to return the +// PostingList to the free pool, we prepend it to the free list. +class IndexBlock { + public: + // What is the maximum posting list size in bytes that can be stored in this + // block. + static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes) { + return (block_size_in_bytes - sizeof(BlockHeader)) / sizeof(Hit) * + sizeof(Hit); + } + + // For a given min number of bits needed to store PostingListIndex for a + // block of "block_size", return the approximate number of hits that a full + // posting list in this block could accomodate. + static uint32_t ApproximateFullPostingListHitsForBlock( + uint32_t block_size, int posting_list_index_bits); + + // Create an IndexBlock to reference the previously used region of the + // mmapped_file starting at offset with size block_size + // + // RETURNS: + // - a valid IndexBlock on success + // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or + // if the posting list size stored in the region is not a valid posting + // list size or it exceeds max_posting_list_bytes(size). + // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size) + static libtextclassifier3::StatusOr<IndexBlock> + CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem, + std::string_view file_path, + off_t offset, uint32_t block_size); + + // Create an IndexBlock to reference an uninitialized region of the + // mmapped_file starting at offset with size block_size. The IndexBlock will + // initialize the region to be an empty IndexBlock with posting lists of size + // posting_list_bytes. + // + // RETURNS: + // - a valid IndexBlock on success + // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or + // if posting_list_bytes is not a valid posting list size or it exceeds + // max_posting_list_bytes(size). + // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size) + static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion( + const Filesystem& filesystem, std::string_view file_path, off_t offset, + uint32_t block_size, uint32_t posting_list_bytes); + + IndexBlock(const IndexBlock&) = delete; + IndexBlock& operator=(const IndexBlock&) = delete; + IndexBlock(IndexBlock&&) = default; + IndexBlock& operator=(IndexBlock&&) = default; + + // Instantiate a PostingListUsed at posting_list_index with the existing + // content in the IndexBlock. + // + // RETURNS: + // - a valid PostingListUsed on success + // - INVALID_ARGUMENT if posting_list_index >= max_num_posting_lists() + libtextclassifier3::StatusOr<PostingListUsed> GetAllocatedPostingList( + PostingListIndex posting_list_index); + + // Allocates a PostingListUsed in the IndexBlock, if possible. + // + // RETURNS: + // - a valid PostingListIndex that can be used to retrieve the allocated + // PostingListUsed via a call to GetAllocatedPostingList + // - RESOURCE_EXHAUSTED if !has_free_posting_lists() + libtextclassifier3::StatusOr<PostingListIndex> AllocatePostingList(); + + // Free posting list at posting_list_index. + // + // It is considered an error to "double-free" a posting list. You should never + // call FreePostingList(index) with the same index twice, unless that index + // was returned by an intervening AllocatePostingList() call. + // + // Ex. + // PostingListIndex index = block.AllocatePostingList(); + // DoSomething(block.GetAllocatedPostingList(index)); + // block.FreePostingList(index); + // block.FreePostingList(index); // Argh! What are you doing?! + // ... + // PostingListIndex index = block.AllocatePostingList(); + // DoSomething(block.GetAllocatedPostingList(index)); + // block.FreePostingList(index); + // index = block.AllocatePostingList(); + // DoSomethingElse(block.GetAllocatedPostingList(index)); + // // A-Ok! We called AllocatePostingList() since the last FreePostingList() + // call. block.FreePostingList(index); + // + // Has no effect if posting_list_index >= max_num_posting_lists(). + void FreePostingList(PostingListIndex posting_list_index); + + // Blocks can be chained. The interpretation of the chaining is up + // to the caller. + uint32_t next_block_index() const { return header_->next_block_index; } + + void set_next_block_index(uint32_t next_block_index) { + header_->next_block_index = next_block_index; + } + + // Retrieves the size (in bytes) of the posting lists in this IndexBlock. + uint32_t get_posting_list_bytes() const { + return header_->posting_list_bytes; + } + + // Maximum number of posting lists in the block. + uint32_t max_num_posting_lists() const { + return total_posting_lists_bytes() / get_posting_list_bytes(); + } + + // Number of bits required to store the largest PostingListIndex in this + // block. + int posting_list_index_bits() const { + return BitsToStore(max_num_posting_lists()); + } + + // Returns whether or not there are available posting lists in the free list. + bool has_free_posting_lists() const { + return header_->free_list_posting_list_index != kInvalidPostingListIndex; + } + + private: + // Assumes that mmapped_file already has established a valid mapping to the + // requested block. + IndexBlock(MemoryMappedFile mmapped_block); + + // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds + // all posting lists to the free list. + // + // RETURNS: + // - OK, on success + // - INVALID_ARGUMENT if posting_list_bytes is a valid posting list size. + libtextclassifier3::Status Reset(int posting_list_bytes); + + char* get_posting_list_ptr(PostingListIndex posting_list_index); + + // Bytes in the block available for posting lists (minus header, + // alignment, etc.). + uint32_t total_posting_lists_bytes() const { + return block_size_in_bytes_ - sizeof(BlockHeader); + } + + struct BlockHeader { + // Index of the next block if this block is being chained or part of a free + // list. + uint32_t next_block_index; + + // Index to the first PostingListFree in the IndexBlock. This is the start + // of the free list. + PostingListIndex free_list_posting_list_index; + + // The size of each posting list in the IndexBlock. + uint32_t posting_list_bytes; + }; + // Pointer to the header of this block. The header is used to store info about + // this block and its posting lists. + BlockHeader* header_; + // Pointer to the beginning of the posting lists region - the area the block + // after the header. + char* posting_lists_start_ptr_; + uint32_t block_size_in_bytes_; + + // MemoryMappedFile used to interact with the underlying flash block. + MemoryMappedFile mmapped_block_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_INDEX_MAIN_INDEX_BLOCK_H_ diff --git a/icing/index/main/index-block_test.cc b/icing/index/main/index-block_test.cc new file mode 100644 index 0000000..08ba57d --- /dev/null +++ b/icing/index/main/index-block_test.cc @@ -0,0 +1,354 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/main/index-block.h" + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/filesystem.h" +#include "icing/file/memory-mapped-file.h" +#include "icing/index/main/posting-list-used.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +static constexpr int kBlockSize = 4096; + +bool CreateFileWithSize(const Filesystem& filesystem, const std::string& file, + int size) { + size_t parent_dir_end = file.find_last_of('/'); + if (parent_dir_end == std::string::npos) { + return false; + } + std::string file_dir = file.substr(0, parent_dir_end); + return filesystem.CreateDirectoryRecursively(file_dir.c_str()) && + filesystem.Grow(file.c_str(), size); +} + +using ::testing::ElementsAreArray; +using ::testing::Eq; + +TEST(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) { + constexpr int kPostingListBytes = 20; + + Filesystem filesystem; + std::string flash_file = GetTestTempDir() + "/flash/0"; + // Grow the file by one block for the IndexBlock to use. + ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); + + { + // Create an IndexBlock from this newly allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize, + kPostingListBytes)); + EXPECT_TRUE(block.has_free_posting_lists()); + } +} + +TEST(IndexBlockTest, SizeAccessorsWorkCorrectly) { + constexpr int kPostingListBytes1 = 20; + + Filesystem filesystem; + std::string flash_file = GetTestTempDir() + "/flash/0"; + // Grow the file by one block for the IndexBlock to use. + ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); + + // Create an IndexBlock from this newly allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize, + kPostingListBytes1)); + EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes1)); + // There should be (4096 - 12) / 20 = 204 posting lists + // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 203 in only 8 + // bits. + EXPECT_THAT(block.max_num_posting_lists(), Eq(204)); + EXPECT_THAT(block.posting_list_index_bits(), Eq(8)); + + constexpr int kPostingListBytes2 = 200; + + // Create an IndexBlock from this newly allocated file block. + ICING_ASSERT_OK_AND_ASSIGN(block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, + kBlockSize, kPostingListBytes2)); + EXPECT_THAT(block.get_posting_list_bytes(), Eq(kPostingListBytes2)); + // There should be (4096 - 12) / 200 = 20 posting lists + // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5 + // bits. + EXPECT_THAT(block.max_num_posting_lists(), Eq(20)); + EXPECT_THAT(block.posting_list_index_bits(), Eq(5)); +} + +TEST(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) { + constexpr int kPostingListBytes = 2000; + + Filesystem filesystem; + std::string flash_file = GetTestTempDir() + "/flash/0"; + // Grow the file by one block for the IndexBlock to use. + ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); + + std::vector<Hit> test_hits{ + Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore), + Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore), + Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99), + Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17), + Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore), + }; + PostingListIndex allocated_index; + { + // Create an IndexBlock from this newly allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, + /*offset=*/0, + /*block_size=*/kBlockSize, kPostingListBytes)); + // Add hits to the first posting list. + ICING_ASSERT_OK_AND_ASSIGN(allocated_index, block.AllocatePostingList()); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used, + block.GetAllocatedPostingList(allocated_index)); + for (const Hit& hit : test_hits) { + ICING_ASSERT_OK(pl_used.PrependHit(hit)); + } + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray( + test_hits.rbegin(), test_hits.rend()))); + } + { + // Create an IndexBlock from the previously allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, + IndexBlock::CreateFromPreexistingIndexBlockRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize)); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used, + block.GetAllocatedPostingList(allocated_index)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray( + test_hits.rbegin(), test_hits.rend()))); + EXPECT_TRUE(block.has_free_posting_lists()); + } +} + +TEST(IndexBlockTest, IndexBlockMultiplePostingLists) { + constexpr int kPostingListBytes = 2000; + + Filesystem filesystem; + std::string flash_file = GetTestTempDir() + "/flash/0"; + // Grow the file by one block for the IndexBlock to use. + ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); + + std::vector<Hit> hits_in_posting_list1{ + Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore), + Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore), + Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99), + Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17), + Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore), + }; + std::vector<Hit> hits_in_posting_list2{ + Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88), + Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore), + Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2), + Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12), + Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore), + }; + PostingListIndex allocated_index_1; + PostingListIndex allocated_index_2; + { + // Create an IndexBlock from this newly allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize, + kPostingListBytes)); + + // Add hits to the first posting list. + ICING_ASSERT_OK_AND_ASSIGN(allocated_index_1, block.AllocatePostingList()); + ICING_ASSERT_OK_AND_ASSIGN( + PostingListUsed pl_used_1, + block.GetAllocatedPostingList(allocated_index_1)); + for (const Hit& hit : hits_in_posting_list1) { + ICING_ASSERT_OK(pl_used_1.PrependHit(hit)); + } + EXPECT_THAT(pl_used_1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(), + hits_in_posting_list1.rend()))); + + // Add hits to the second posting list. + ICING_ASSERT_OK_AND_ASSIGN(allocated_index_2, block.AllocatePostingList()); + ICING_ASSERT_OK_AND_ASSIGN( + PostingListUsed pl_used_2, + block.GetAllocatedPostingList(allocated_index_2)); + for (const Hit& hit : hits_in_posting_list2) { + ICING_ASSERT_OK(pl_used_2.PrependHit(hit)); + } + EXPECT_THAT(pl_used_2.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(), + hits_in_posting_list2.rend()))); + + EXPECT_THAT(block.AllocatePostingList(), + StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); + EXPECT_FALSE(block.has_free_posting_lists()); + } + { + // Create an IndexBlock from the previously allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, + IndexBlock::CreateFromPreexistingIndexBlockRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize)); + ICING_ASSERT_OK_AND_ASSIGN( + PostingListUsed pl_used_1, + block.GetAllocatedPostingList(allocated_index_1)); + EXPECT_THAT(pl_used_1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(), + hits_in_posting_list1.rend()))); + ICING_ASSERT_OK_AND_ASSIGN( + PostingListUsed pl_used_2, + block.GetAllocatedPostingList(allocated_index_2)); + EXPECT_THAT(pl_used_2.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(), + hits_in_posting_list2.rend()))); + EXPECT_THAT(block.AllocatePostingList(), + StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); + EXPECT_FALSE(block.has_free_posting_lists()); + } +} + +TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) { + constexpr int kPostingListBytes = 2000; + + Filesystem filesystem; + std::string flash_file = GetTestTempDir() + "/flash/0"; + // Grow the file by one block for the IndexBlock to use. + ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); + + // Create an IndexBlock from this newly allocated file block. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, + IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize, kPostingListBytes)); + + // Add hits to the first posting list. + std::vector<Hit> hits_in_posting_list1{ + Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore), + Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore), + Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99), + Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17), + Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore), + }; + ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1, + block.AllocatePostingList()); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_1, + block.GetAllocatedPostingList(allocated_index_1)); + for (const Hit& hit : hits_in_posting_list1) { + ICING_ASSERT_OK(pl_used_1.PrependHit(hit)); + } + EXPECT_THAT(pl_used_1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(), + hits_in_posting_list1.rend()))); + + // Add hits to the second posting list. + std::vector<Hit> hits_in_posting_list2{ + Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88), + Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore), + Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2), + Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12), + Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore), + }; + ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2, + block.AllocatePostingList()); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used_2, + block.GetAllocatedPostingList(allocated_index_2)); + for (const Hit& hit : hits_in_posting_list2) { + ICING_ASSERT_OK(pl_used_2.PrependHit(hit)); + } + EXPECT_THAT(pl_used_2.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(), + hits_in_posting_list2.rend()))); + + EXPECT_THAT(block.AllocatePostingList(), + StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); + EXPECT_FALSE(block.has_free_posting_lists()); + + // Now free the first posting list. Then, reallocate it and fill it with a + // different set of hits. + block.FreePostingList(allocated_index_1); + EXPECT_TRUE(block.has_free_posting_lists()); + + std::vector<Hit> hits_in_posting_list3{ + Hit(/*section_id=*/12, /*document_id=*/0, /*score=*/88), + Hit(/*section_id=*/17, /*document_id=*/1, Hit::kMaxHitScore), + Hit(/*section_id=*/0, /*document_id=*/2, /*score=*/2), + }; + ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3, + block.AllocatePostingList()); + EXPECT_THAT(allocated_index_3, Eq(allocated_index_1)); + ICING_ASSERT_OK_AND_ASSIGN(pl_used_1, + block.GetAllocatedPostingList(allocated_index_3)); + for (const Hit& hit : hits_in_posting_list3) { + ICING_ASSERT_OK(pl_used_1.PrependHit(hit)); + } + EXPECT_THAT(pl_used_1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits_in_posting_list3.rbegin(), + hits_in_posting_list3.rend()))); + EXPECT_THAT(block.AllocatePostingList(), + StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); + EXPECT_FALSE(block.has_free_posting_lists()); +} + +TEST(IndexBlockTest, IndexBlockNextBlockIndex) { + constexpr int kPostingListBytes = 2000; + constexpr int kSomeBlockIndex = 22; + + Filesystem filesystem; + std::string flash_file = GetTestTempDir() + "/flash/0"; + // Grow the file by one block for the IndexBlock to use. + ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); + + { + // Create an IndexBlock from this newly allocated file block and set the + // next block index. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize, + kPostingListBytes)); + EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex)); + block.set_next_block_index(kSomeBlockIndex); + EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex)); + } + { + // Create an IndexBlock from this previously allocated file block and make + // sure that next_block_index is still set properly. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, + IndexBlock::CreateFromPreexistingIndexBlockRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize)); + EXPECT_THAT(block.next_block_index(), Eq(kSomeBlockIndex)); + } + { + // Create an IndexBlock, treating this file block as uninitialized. This + // reset the next_block_index to kInvalidBlockIndex. + ICING_ASSERT_OK_AND_ASSIGN( + IndexBlock block, IndexBlock::CreateFromUninitializedRegion( + filesystem, flash_file, /*offset=*/0, kBlockSize, + kPostingListBytes)); + EXPECT_THAT(block.next_block_index(), Eq(kInvalidBlockIndex)); + } +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/index/posting-list-free.h b/icing/index/main/posting-list-free.h index a2eba82..4b27401 100644 --- a/icing/index/posting-list-free.h +++ b/icing/index/main/posting-list-free.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef ICING_INDEX_POSTING_LIST_FREE_H_ -#define ICING_INDEX_POSTING_LIST_FREE_H_ +#ifndef ICING_INDEX_MAIN_POSTING_LIST_FREE_H_ +#define ICING_INDEX_MAIN_POSTING_LIST_FREE_H_ #include <string.h> #include <sys/mman.h> @@ -23,7 +23,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/index/hit/hit.h" -#include "icing/index/posting-list-utils.h" +#include "icing/index/main/posting-list-utils.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" @@ -33,7 +33,7 @@ namespace lib { // A FlashIndexBlock can contain multiple posting lists. This specifies which // PostingList in the FlashIndexBlock we want to refer to. -using PostingListIndex = uint32_t; +using PostingListIndex = int32_t; inline constexpr PostingListIndex kInvalidPostingListIndex = ~0U; // A posting list in the index block's free list. @@ -126,4 +126,4 @@ class PostingListFree { } // namespace lib } // namespace icing -#endif // ICING_INDEX_POSTING_LIST_FREE_H_ +#endif // ICING_INDEX_MAIN_POSTING_LIST_FREE_H_ diff --git a/icing/index/posting-list-free_test.cc b/icing/index/main/posting-list-free_test.cc index 80b8957..a152934 100644 --- a/icing/index/posting-list-free_test.cc +++ b/icing/index/main/posting-list-free_test.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/index/posting-list-free.h" +#include "icing/index/main/posting-list-free.h" #include <cstdint> #include <memory> #include "icing/text_classifier/lib3/utils/base/status.h" #include "gtest/gtest.h" -#include "icing/index/posting-list-utils.h" +#include "icing/index/main/posting-list-utils.h" #include "icing/testing/common-matchers.h" namespace icing { diff --git a/icing/index/posting-list-used.cc b/icing/index/main/posting-list-used.cc index 708b13b..a439c45 100644 --- a/icing/index/posting-list-used.cc +++ b/icing/index/main/posting-list-used.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/index/posting-list-used.h" +#include "icing/index/main/posting-list-used.h" #include <algorithm> #include <cinttypes> @@ -20,7 +20,7 @@ #include <limits> #include "icing/absl_ports/canonical_errors.h" -#include "icing/index/posting-list-utils.h" +#include "icing/index/main/posting-list-utils.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/legacy/index/icing-bit-util.h" #include "icing/util/status-macros.h" @@ -57,7 +57,10 @@ PostingListUsed::CreateFromUnitializedRegion(void *posting_list_buffer, return posting_list_used; } -void PostingListUsed::Clear() { set_start_byte_offset(size_in_bytes_); } +void PostingListUsed::Clear() { + // Safe to ignore return value because size_in_bytes_ a valid argument. + set_start_byte_offset(size_in_bytes_); +} libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) { ICING_RETURN_ERROR_IF_NULL(other); @@ -71,7 +74,7 @@ libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) { return absl_ports::FailedPreconditionError( "This posting list is in an invalid state and can't be used!"); } - if (other->IsPostingListValid()) { + if (!other->IsPostingListValid()) { return absl_ports::InvalidArgumentError( "Cannot MoveFrom an invalid posting list!"); } @@ -82,7 +85,7 @@ libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) { while (other->full() || other->almost_full() || (size_in_bytes_ - posting_list_utils::kSpecialHitsSize < other->BytesUsed())) { - if (other->GetHitsInternal(/*limit=*/1, /*pop=*/true, &hits) != 1) { + if (!other->GetHitsInternal(/*limit=*/1, /*pop=*/true, &hits).ok()) { return absl_ports::AbortedError( "Unable to retrieve hits from other posting list."); } @@ -96,7 +99,7 @@ libtextclassifier3::Status PostingListUsed::MoveFrom(PostingListUsed *other) { // Because we popped all hits from other outside of the compressed area and we // guaranteed that other->BytesUsed is less than size_in_bytes_ - // kSpecialHitSize. This is guaranteed to be a valid byte offset for the - // NOT_FULL state. + // NOT_FULL state, so ignoring the value is safe. set_start_byte_offset(size_in_bytes_ - other->BytesUsed()); // Put back remaining hits. @@ -127,20 +130,22 @@ uint32_t PostingListUsed::GetPadEnd(uint32_t offset) const { return pad_end; } -void PostingListUsed::PadToEnd(uint32_t start, uint32_t end) { +bool PostingListUsed::PadToEnd(uint32_t start, uint32_t end) { if (end > size_in_bytes_) { ICING_LOG(ERROR) << "Cannot pad a region that ends after size!"; - return; + return false; } // In VarInt a value of 0 encodes to 0. memset(posting_list_buffer_ + start, 0, end - start); + return true; } libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull( const Hit &hit) { // Get delta between first hit and the new hit. Try to fit delta // in the padded area and put new hit at the special position 1. - Hit cur = get_special_hit(1); + // Calling ValueOrDie is safe here because 1 < kNumSpecialHits. + Hit cur = get_special_hit(1).ValueOrDie(); if (cur.value() <= hit.value()) { return absl_ports::InvalidArgumentError( "Hit being prepended must be strictly less than the most recent Hit"); @@ -164,12 +169,15 @@ libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull( uint8_t *score_offset = delta_offset + delta_len; memcpy(score_offset, &score, cur_score_bytes); - // Now first hit is the new hit, at special position 1. + // Now first hit is the new hit, at special position 1. Safe to ignore the + // return value because 1 < kNumSpecialHits. set_special_hit(1, hit); + // Safe to ignore the return value because sizeof(Hit) is a valid argument. set_start_byte_offset(sizeof(Hit)); } else { // No space for delta. We put the new hit at special position 0 - // and go to the full state. + // and go to the full state. Safe to ignore the return value because 1 < + // kNumSpecialHits. set_special_hit(0, hit); } return libtextclassifier3::Status::OK; @@ -178,13 +186,17 @@ libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull( void PostingListUsed::PrependHitToEmpty(const Hit &hit) { // First hit to be added. Just add verbatim, no compression. if (size_in_bytes_ == posting_list_utils::kSpecialHitsSize) { + // Safe to ignore the return value because 1 < kNumSpecialHits set_special_hit(1, hit); + // Safe to ignore the return value because sizeof(Hit) is a valid argument. set_start_byte_offset(sizeof(Hit)); } else { // Since this is the first hit, size != kSpecialHitsSize and // size % sizeof(Hit) == 0, we know that there is room to fit 'hit' into - // the compressed region. - uint32_t offset = PrependHitUncompressed(hit, size_in_bytes_); + // the compressed region, so ValueOrDie is safe. + uint32_t offset = PrependHitUncompressed(hit, size_in_bytes_).ValueOrDie(); + // Safe to ignore the return value because PrependHitUncompressed is + // guaranteed to return a valid offset. set_start_byte_offset(offset); } } @@ -226,12 +238,14 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull( memcpy(posting_list_buffer_ + offset, delta_buf, delta_len); // Prepend new hit with (possibly) its score. We know that there is room - // for 'hit' because of the if statement above. - offset = PrependHitUncompressed(hit, offset); - // offset is guaranteed to be valid here. The if above will guarantee that - // offset >= kSpecialHitSize and < size_in_bytes_ because the if ensures - // that there is enough room between offset and kSpecialHitSize to fit the - // delta of the previous hit, any score and the uncompressed hit. + // for 'hit' because of the if statement above, so calling ValueOrDie is + // safe. + offset = PrependHitUncompressed(hit, offset).ValueOrDie(); + // offset is guaranteed to be valid here. So it's safe to ignore the return + // value. The if above will guarantee that offset >= kSpecialHitSize and < + // size_in_bytes_ because the if ensures that there is enough room between + // offset and kSpecialHitSize to fit the delta of the previous hit, any + // score and the uncompressed hit. set_start_byte_offset(offset); } else if (posting_list_utils::kSpecialHitsSize + delta_len <= offset) { // Only have space for delta. The new hit must be put in special @@ -241,13 +255,17 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull( offset -= delta_len; memcpy(posting_list_buffer_ + offset, delta_buf, delta_len); - // Prepend pad. + // Prepend pad. Safe to ignore the return value of PadToEnd because offset + // must be less than size_in_bytes_. Otherwise, this function already would + // have returned FAILED_PRECONDITION. PadToEnd(posting_list_utils::kSpecialHitsSize, offset); - // Put new hit in special position 1. + // Put new hit in special position 1. Safe to ignore return value because 1 + // < kNumSpecialHits. set_special_hit(1, hit); - // State almost_full. + // State almost_full. Safe to ignore the return value because sizeof(Hit) is + // a valid argument. set_start_byte_offset(sizeof(Hit)); } else { // Very rare case where delta is larger than sizeof(Hit::Value) @@ -256,10 +274,18 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull( // special position 0. Hit cur(cur_value); if (cur.has_score()) { - cur = Hit(cur_value, ReadScore(offset)); + // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes. + // Therefore, offset must be less than kSpecialHitSize + 5. Since posting + // list size must be divisible by sizeof(Hit) (5), it is guaranteed that + // offset < size_in_bytes, so it is safe to call ValueOrDie here. + cur = Hit(cur_value, ReadScore(offset).ValueOrDie()); offset += sizeof(Hit::Score); } + // Safe to ignore the return value of PadToEnd because offset must be less + // than size_in_bytes_. Otherwise, this function already would have returned + // FAILED_PRECONDITION. PadToEnd(posting_list_utils::kSpecialHitsSize, offset); + // Safe to ignore the return value here because 0 and 1 < kNumSpecialHits. set_special_hit(1, cur); set_special_hit(0, hit); } @@ -292,18 +318,20 @@ libtextclassifier3::Status PostingListUsed::PrependHit(const Hit &hit) { } } -std::vector<Hit> PostingListUsed::GetHits() const { +libtextclassifier3::StatusOr<std::vector<Hit>> PostingListUsed::GetHits() + const { std::vector<Hit> hits_out; - GetHits(&hits_out); + ICING_RETURN_IF_ERROR(GetHits(&hits_out)); return hits_out; } -void PostingListUsed::GetHits(std::vector<Hit> *hits_out) const { - GetHitsInternal(/*limit=*/std::numeric_limits<uint32_t>::max(), /*pop=*/false, - hits_out); +libtextclassifier3::Status PostingListUsed::GetHits( + std::vector<Hit> *hits_out) const { + return GetHitsInternal(/*limit=*/std::numeric_limits<uint32_t>::max(), + /*pop=*/false, hits_out); } -void PostingListUsed::PopFrontHits(uint32_t num_hits) { +libtextclassifier3::Status PostingListUsed::PopFrontHits(uint32_t num_hits) { if (num_hits == 1 && full()) { // The PL is in full status which means that we save 2 uncompressed hits in // the 2 special postions. But full status may be reached by 2 different @@ -358,7 +386,7 @@ void PostingListUsed::PopFrontHits(uint32_t num_hits) { // Popping 2 hits should never fail because we've just ensured that the // posting list is in the FULL state. - GetHitsInternal(/*limit=*/2, /*pop=*/true, &out); + ICING_RETURN_IF_ERROR(GetHitsInternal(/*limit=*/2, /*pop=*/true, &out)); // PrependHit should never fail because out[1] is a valid hit less than // previous hits in the posting list and because there's no way that the @@ -366,12 +394,13 @@ void PostingListUsed::PopFrontHits(uint32_t num_hits) { // AND another hit. PrependHit(out[1]); } else if (num_hits > 0) { - GetHitsInternal(/*limit=*/num_hits, /*pop=*/true, nullptr); + return GetHitsInternal(/*limit=*/num_hits, /*pop=*/true, nullptr); } + return libtextclassifier3::Status::OK; } -uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop, - std::vector<Hit> *out) const { +libtextclassifier3::Status PostingListUsed::GetHitsInternal( + uint32_t limit, bool pop, std::vector<Hit> *out) const { // Put current uncompressed val here. Hit::Value val = Hit::kInvalidValue; uint32_t offset = get_start_byte_offset(); @@ -379,7 +408,9 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop, // First traverse the first two special positions. while (count < limit && offset < posting_list_utils::kSpecialHitsSize) { - Hit hit = get_special_hit(offset / sizeof(Hit)); + // Calling ValueOrDie is safe here because offset / sizeof(Hit) < + // kNumSpecialHits because of the check above. + Hit hit = get_special_hit(offset / sizeof(Hit)).ValueOrDie(); val = hit.value(); if (out != nullptr) { out->push_back(hit); @@ -407,7 +438,16 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop, } Hit hit(val); if (hit.has_score()) { - hit = Hit(val, ReadScore(offset)); + auto score_or = ReadScore(offset); + if (!score_or.ok()) { + // This posting list has been corrupted somehow. The first hit of the + // posting list claims to have a score, but there's no more room in the + // posting list for that score to exist. Return an empty vector and zero + // to indicate no hits retrieved. + out->clear(); + return absl_ports::InternalError("Posting list has been corrupted!"); + } + hit = Hit(val, score_or.ValueOrDie()); offset += sizeof(Hit::Score); } if (out != nullptr) { @@ -435,18 +475,33 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop, offset -= sizeof(Hit::Value); memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value)); } else { - // val won't fit in compressed area. Also see if there is a - // score. + // val won't fit in compressed area. Also see if there is a score. Hit hit(val); if (hit.has_score()) { - hit = Hit(val, ReadScore(offset)); + auto score_or = ReadScore(offset); + if (!score_or.ok()) { + // This posting list has been corrupted somehow. The first hit of + // the posting list claims to have a score, but there's no more room + // in the posting list for that score to exist. Return an empty + // vector and zero to indicate no hits retrieved. Do not pop + // anything. + out->clear(); + return absl_ports::InternalError( + "Posting list has been corrupted!"); + } + hit = Hit(val, score_or.ValueOrDie()); } + // Okay to ignore the return value here because 1 < kNumSpecialHits. mutable_this->set_special_hit(1, hit); + + // Prepend pad. Safe to ignore the return value of PadToEnd because + // offset must be less than size_in_bytes_ thanks to the if above. mutable_this->PadToEnd(posting_list_utils::kSpecialHitsSize, offset); offset = sizeof(Hit); } } - // offset is guaranteed to be valid. It falls into one of four scenarios: + // offset is guaranteed to be valid so ignoring the return value of + // set_start_byte_offset is safe. It falls into one of four scenarios: // Scenario 1: the above if was false because offset is not < size_in_bytes_ // In this case, offset must be == size_in_bytes_ because we reached // offset by unwinding hits on the posting list. @@ -466,26 +521,28 @@ uint32_t PostingListUsed::GetHitsInternal(uint32_t limit, bool pop, mutable_this->set_start_byte_offset(offset); } - return count; + return libtextclassifier3::Status::OK; } -Hit PostingListUsed::get_special_hit(uint32_t index) const { +libtextclassifier3::StatusOr<Hit> PostingListUsed::get_special_hit( + uint32_t index) const { static_assert(sizeof(Hit::Value) >= sizeof(uint32_t), "HitTooSmall"); - if (index >= posting_list_utils::kSpecialHitsSize / sizeof(Hit)) { - ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1"; - return Hit(); + if (index >= posting_list_utils::kNumSpecialHits || index < 0) { + return absl_ports::InvalidArgumentError( + "Special hits only exist at indices 0 and 1"); } Hit val; memcpy(&val, posting_list_buffer_ + index * sizeof(val), sizeof(val)); return val; } -void PostingListUsed::set_special_hit(uint32_t index, const Hit &val) { - if (index >= posting_list_utils::kSpecialHitsSize / sizeof(Hit)) { +bool PostingListUsed::set_special_hit(uint32_t index, const Hit &val) { + if (index >= posting_list_utils::kNumSpecialHits || index < 0) { ICING_LOG(ERROR) << "Special hits only exist at indices 0 and 1"; - return; + return false; } memcpy(posting_list_buffer_ + index * sizeof(val), &val, sizeof(val)); + return true; } uint32_t PostingListUsed::BytesUsed() const { @@ -515,17 +572,20 @@ uint32_t PostingListUsed::MinPostingListSizeToFit() const { bool PostingListUsed::IsPostingListValid() const { if (almost_full()) { - // Special Hit 1 should hold a Hit. - if (!get_special_hit(1).is_valid()) { + // Special Hit 1 should hold a Hit. Calling ValueOrDie is safe because we + // know that 1 < kNumSpecialHits. + if (!get_special_hit(1).ValueOrDie().is_valid()) { ICING_LOG(ERROR) << "Both special hits cannot be invalid at the same time."; return false; } } else if (!full()) { - // NOT_FULL. Special Hit 0 should hold a valid offset. - if (get_special_hit(0).value() > size_in_bytes_ || - get_special_hit(0).value() < posting_list_utils::kSpecialHitsSize) { - ICING_LOG(ERROR) << "Hit: " << get_special_hit(0).value() + // NOT_FULL. Special Hit 0 should hold a valid offset. Calling ValueOrDie is + // safe because we know that 0 < kNumSpecialHits. + if (get_special_hit(0).ValueOrDie().value() > size_in_bytes_ || + get_special_hit(0).ValueOrDie().value() < + posting_list_utils::kSpecialHitsSize) { + ICING_LOG(ERROR) << "Hit: " << get_special_hit(0).ValueOrDie().value() << " size: " << size_in_bytes_ << " sp size: " << posting_list_utils::kSpecialHitsSize; return false; @@ -540,55 +600,57 @@ uint32_t PostingListUsed::get_start_byte_offset() const { } else if (almost_full()) { return sizeof(Hit); } else { - // NOT_FULL - return get_special_hit(0).value(); + // NOT_FULL, calling ValueOrDie is safe because we know that 0 < + // kNumSpecialHits. + return get_special_hit(0).ValueOrDie().value(); } } -void PostingListUsed::set_start_byte_offset(uint32_t offset) { +bool PostingListUsed::set_start_byte_offset(uint32_t offset) { if (offset > size_in_bytes_) { ICING_LOG(ERROR) << "offset cannot be a value greater than size " << size_in_bytes_ << ". offset is " << offset << "."; - return; + return false; } if (offset < posting_list_utils::kSpecialHitsSize && offset > sizeof(Hit)) { ICING_LOG(ERROR) << "offset cannot be a value between (" << sizeof(Hit) << ", " << posting_list_utils::kSpecialHitsSize << "). offset is " << offset << "."; - return; + return false; } if (offset < sizeof(Hit) && offset != 0) { ICING_LOG(ERROR) << "offset cannot be a value between (0, " << sizeof(Hit) << "). offset is " << offset << "."; - return; + return false; } if (offset >= posting_list_utils::kSpecialHitsSize) { - // not_full state. + // not_full state. Safe to ignore the return value because 0 and 1 are both + // < kNumSpecialHits. set_special_hit(0, Hit(offset)); set_special_hit(1, Hit()); } else if (offset == sizeof(Hit)) { - // almost_full state. + // almost_full state. Safe to ignore the return value because 1 is both < + // kNumSpecialHits. set_special_hit(0, Hit()); } // Nothing to do for the FULL state - the offset isn't actually stored // anywhere and both special hits hold valid hits. + return true; } -uint32_t PostingListUsed::PrependHitUncompressed(const Hit &hit, - uint32_t offset) { +libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed( + const Hit &hit, uint32_t offset) { if (hit.has_score()) { if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit)) { - ICING_LOG(ERROR) << "Not enough room to prepend Hit at offset " << offset - << "."; - return offset; + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Not enough room to prepend Hit at offset %d.", offset)); } offset -= sizeof(Hit); memcpy(posting_list_buffer_ + offset, &hit, sizeof(Hit)); } else { if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value)) { - ICING_LOG(ERROR) << "Not enough room to prepend Hit::Value at offset " - << offset << "."; - return offset; + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "Not enough room to prepend Hit::Value at offset %d.", offset)); } offset -= sizeof(Hit::Value); Hit::Value val = hit.value(); @@ -597,12 +659,12 @@ uint32_t PostingListUsed::PrependHitUncompressed(const Hit &hit, return offset; } -Hit::Score PostingListUsed::ReadScore(uint32_t offset) const { +libtextclassifier3::StatusOr<Hit::Score> PostingListUsed::ReadScore( + uint32_t offset) const { if (offset + sizeof(Hit::Score) > size_in_bytes_) { - ICING_LOG(FATAL) - << "offset " << offset - << " must not point past the end of the posting list of size " - << size_in_bytes_ << "."; + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "offset %d must not point past the end of the posting list of size %d.", + offset, size_in_bytes_)); } Hit::Score score; memcpy(&score, posting_list_buffer_ + offset, sizeof(Hit::Score)); diff --git a/icing/index/posting-list-used.h b/icing/index/main/posting-list-used.h index 492435b..8bc9c8d 100644 --- a/icing/index/posting-list-used.h +++ b/icing/index/main/posting-list-used.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef ICING_INDEX_POSTING_LIST_USED_H_ -#define ICING_INDEX_POSTING_LIST_USED_H_ +#ifndef ICING_INDEX_MAIN_POSTING_LIST_USED_H_ +#define ICING_INDEX_MAIN_POSTING_LIST_USED_H_ #include <string.h> #include <sys/mman.h> @@ -24,7 +24,7 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/hit/hit.h" -#include "icing/index/posting-list-utils.h" +#include "icing/index/main/posting-list-utils.h" #include "icing/util/logging.h" namespace icing { @@ -97,15 +97,27 @@ class PostingListUsed { uint32_t PrependHitArray(const T *array, uint32_t num_hits, bool keep_prepended); - // Return hits sorted by the reverse order of prepending. - std::vector<Hit> GetHits() const; + // Retrieves the hits stored in the posting list. + // + // RETURNS: + // - On success, a vector of hits sorted by the reverse order of prepending. + // - INTERNAL_ERROR if the posting list has been corrupted somehow. + libtextclassifier3::StatusOr<std::vector<Hit>> GetHits() const; // Same as GetHits but appends hits to hits_out. - void GetHits(std::vector<Hit> *hits_out) const; + // + // RETURNS: + // - On success, a vector of hits sorted by the reverse order of prepending. + // - INTERNAL_ERROR if the posting list has been corrupted somehow. + libtextclassifier3::Status GetHits(std::vector<Hit> *hits_out) const; // Undo the last num_hits hits prepended. If num_hits > number of // hits we clear all hits. - void PopFrontHits(uint32_t num_hits); + // + // RETURNS: + // - OK on success + // - INTERNAL_ERROR if the posting list has been corrupted somehow. + libtextclassifier3::Status PopFrontHits(uint32_t num_hits); // Returns bytes used by actual hits. uint32_t BytesUsed() const; @@ -194,12 +206,15 @@ class PostingListUsed { // Helpers to determine what state the posting list is in. bool full() const { - return get_special_hit(0).is_valid() && get_special_hit(1).is_valid(); + return get_special_hit(0).ValueOrDie().is_valid() && + get_special_hit(1).ValueOrDie().is_valid(); + } + bool almost_full() const { + return !get_special_hit(0).ValueOrDie().is_valid(); } - bool almost_full() const { return !get_special_hit(0).is_valid(); } bool empty() const { - return get_special_hit(0).value() == size_in_bytes_ && - !get_special_hit(1).is_valid(); + return get_special_hit(0).ValueOrDie().value() == size_in_bytes_ && + !get_special_hit(1).ValueOrDie().is_valid(); } // Returns false if both special hits are invalid or if the offset value @@ -236,10 +251,10 @@ class PostingListUsed { // Sets the special hits to properly reflect what offset is (see layout // comment for further details). - // If offset > size_in_bytes_ or offset is (kSpecialHitsSize, sizeof(Hit)) or - // offset is (sizeof(Hit), 0), then offset is considered invalid and this - // function has no effect. - void set_start_byte_offset(uint32_t offset); + // + // Returns false if offset > size_in_bytes_ or offset is (kSpecialHitsSize, + // sizeof(Hit)) or offset is (sizeof(Hit), 0). True, otherwise. + bool set_start_byte_offset(uint32_t offset); // Manipulate padded areas. We never store the same hit value twice // so a delta of 0 is a pad byte. @@ -247,42 +262,53 @@ class PostingListUsed { // Returns offset of first non-pad byte. uint32_t GetPadEnd(uint32_t offset) const; - // Fill padding between offset start and offset end with 0s. If end > - // size_in_bytes_, this function has no effect. - void PadToEnd(uint32_t start, uint32_t end); + // Fill padding between offset start and offset end with 0s. + // Returns false if end > size_in_bytes_. True, otherwise. + bool PadToEnd(uint32_t start, uint32_t end); - // Helper for AppendHits/PopFrontHits. Returns number actually traversed (also - // the size of out if non-NULL), which will always be equal to 'limit' unless - // there are fewer than 'limit' hits in the posting list. out can be NULL. + // Helper for AppendHits/PopFrontHits. Adds limit number of hits to out or all + // hits in the posting list if the posting list contains less than limit + // number of hits. out can be NULL. // // NOTE: If called with limit=1, pop=true on a posting list that transitioned // from NOT_FULL directly to FULL, GetHitsInternal will not return the posting // list to NOT_FULL. Instead it will leave it in a valid state, but it will be // ALMOST_FULL. - uint32_t GetHitsInternal(uint32_t limit, bool pop, - std::vector<Hit> *out) const; + // + // RETURNS: + // - OK on success + // - INTERNAL_ERROR if the posting list has been corrupted somehow. + libtextclassifier3::Status GetHitsInternal(uint32_t limit, bool pop, + std::vector<Hit> *out) const; - // Retrieves the value stored in the index-th special hit. If index is not - // less than kSpecialHitSize / sizeof(Hit), returns Hit::kInvalidValue. - Hit get_special_hit(uint32_t index) const; + // Retrieves the value stored in the index-th special hit. + // + // RETURNS: + // - A valid Hit, on success + // - INVALID_ARGUMENT if index is not less than kNumSpecialHits + libtextclassifier3::StatusOr<Hit> get_special_hit(uint32_t index) const; // Sets the value stored in the index-th special hit to val. If index is not // less than kSpecialHitSize / sizeof(Hit), this has no effect. - void set_special_hit(uint32_t index, const Hit &val); + bool set_special_hit(uint32_t index, const Hit &val); // Prepends hit to the memory region [offset - sizeof(Hit), offset] and // returns the new beginning of the padded region. // - // If offset - kSpecialHitSize < sizeof(Hit/Hit::Value), then this function - // has no effect. - uint32_t PrependHitUncompressed(const Hit &hit, uint32_t offset); + // RETURNS: + // - The new beginning of the padded region, if successful. + // - INVALID_ARGUMENT if hit will not fit (uncompressed) between offset and + // kSpecialHitsSize + libtextclassifier3::StatusOr<uint32_t> PrependHitUncompressed( + const Hit &hit, uint32_t offset); // Reads the score located at offset and returns it. Callers are responsible // for ensuring that the bytes starting at offset actually represent a score. // - // REQUIRES: offset + sizeof(Hit::Score) < size_in_bytes_ - // REQUIRES enforced by CHECK - Hit::Score ReadScore(uint32_t offset) const; + // RETURNS: + // - The score located at offset, if successful + // - INVALID_ARGUMENT if offset + sizeof(Hit::Score) >= size_in_bytes_ + libtextclassifier3::StatusOr<Hit::Score> ReadScore(uint32_t offset) const; // A byte array of size size_in_bytes_ containing encoded hits for this // posting list. @@ -318,4 +344,4 @@ uint32_t PostingListUsed::PrependHitArray(const T *array, uint32_t num_hits, } // namespace lib } // namespace icing -#endif // ICING_INDEX_POSTING_LIST_USED_H_ +#endif // ICING_INDEX_MAIN_POSTING_LIST_USED_H_ diff --git a/icing/index/posting-list-used_test.cc b/icing/index/main/posting-list-used_test.cc index a0e9514..eb62aeb 100644 --- a/icing/index/posting-list-used_test.cc +++ b/icing/index/main/posting-list-used_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/index/posting-list-used.h" +#include "icing/index/main/posting-list-used.h" #include <fcntl.h> #include <sys/stat.h> @@ -32,18 +32,21 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "icing/index/posting-list-utils.h" +#include "icing/index/main/posting-list-utils.h" #include "icing/legacy/index/icing-bit-util.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/testing/common-matchers.h" +#include "icing/testing/hit-test-utils.h" -using std::min; using std::reverse; using std::vector; using testing::ElementsAre; using testing::ElementsAreArray; +using testing::Eq; using testing::IsEmpty; +using testing::Le; +using testing::Lt; namespace icing { namespace lib { @@ -59,41 +62,6 @@ struct HitElt { Hit hit; }; -// Produces a vector with num_hits HitElts. When delta encoded each hit should -// be 1 byte with a 1 byte Hit::Score. -std::vector<HitElt> CreateHits(DocumentId start_docid, int num_hits) { - std::vector<HitElt> hits; - hits.reserve(num_hits); - while (num_hits--) { - Hit::Score score = (start_docid % 7) + 1; - SectionId section_id = (start_docid + 2) % (kMaxSectionId + 1); - hits.emplace_back(Hit(section_id, start_docid, score)); - ++start_docid; - } - std::reverse(hits.begin(), hits.end()); - return hits; -} - -Hit CreateHit(Hit last_hit, int desired_byte_length) { - Hit hit = - (last_hit.section_id() == kMinSectionId) - ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score()) - : Hit(last_hit.section_id() - 1, last_hit.document_id(), - last_hit.score()); - uint8_t buf[5]; - while (VarInt::Encode(last_hit.value() - hit.value(), buf) < - desired_byte_length) { - hit = (hit.section_id() == kMinSectionId) - ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score()) - : Hit(hit.section_id() - 1, hit.document_id(), hit.score()); - } - return hit; -} - -DocumentId InvertDocumentId(DocumentId document_id) { - return kMaxDocumentId - document_id; -} - TEST(PostingListTest, PostingListUsedPrependHitNotFull) { static const int kNumHits = 2551; static const size_t kHitsSize = kNumHits * sizeof(Hit); @@ -109,16 +77,16 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) { pl_used.PrependHit(hit0); // Size = sizeof(uncompressed hit0) int expected_size = sizeof(Hit); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0))); Hit hit1(/*section_id=*/0, 1, Hit::kMaxHitScore); pl_used.PrependHit(hit1); // Size = sizeof(uncompressed hit1) // + sizeof(hit0-hit1) + sizeof(hit0::score) expected_size += 2 + sizeof(Hit::Score); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0))); Hit hit2(/*section_id=*/0, 2, /*score=*/56); pl_used.PrependHit(hit2); @@ -126,8 +94,8 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) { // + sizeof(hit1-hit2) // + sizeof(hit0-hit1) + sizeof(hit0::score) expected_size += 2; - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit2, hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0))); Hit hit3(/*section_id=*/0, 3, Hit::kMaxHitScore); pl_used.PrependHit(hit3); @@ -136,8 +104,9 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) { // + sizeof(hit1-hit2) // + sizeof(hit0-hit1) + sizeof(hit0::score) expected_size += 2 + sizeof(Hit::Score); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit3, hit2, hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), + IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0))); } TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) { @@ -161,8 +130,8 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) { ICING_EXPECT_OK(pl_used.PrependHit(hit2)); // Size used will be 2+2+4=8 bytes int expected_size = sizeof(Hit::Value) + 2 + 2; - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit2, hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0))); // Add one more hit to transition NOT_FULL -> ALMOST_FULL Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3); @@ -175,8 +144,9 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) { // Because we're in ALMOST_FULL, the expected size is the size of the pl minus // the one hit used to mark the posting list as ALMOST_FULL. expected_size = kHitsSize - sizeof(Hit); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit3, hit2, hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), + IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0))); // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2); @@ -185,8 +155,9 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) { // a 2-byte delta. That delta will fit in the compressed region (which will // now have 9 bytes in use), hit4 will be placed in one of the special hits // and the posting list will remain in ALMOST_FULL. - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit4, hit3, hit2, hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), + IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0))); // Add one more hit to transition ALMOST_FULL -> FULL Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2); @@ -195,9 +166,9 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) { // a 2-byte delta which will not fit in the compressed region. So hit4 will // remain in one of the special hits and hit5 will occupy the other, making // the posting list FULL. - EXPECT_LE(pl_used.BytesUsed(), kHitsSize); + EXPECT_THAT(pl_used.BytesUsed(), Le(kHitsSize)); EXPECT_THAT(pl_used.GetHits(), - ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)); + IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0))); // The posting list is FULL. Adding another hit should fail. Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1); @@ -214,8 +185,8 @@ TEST(PostingListTest, PostingListUsedMinSize) { static_cast<void *>(hits_buf.get()), posting_list_utils::min_posting_list_size())); // PL State: EMPTY - EXPECT_LE(pl_used.BytesUsed(), 0); - EXPECT_THAT(pl_used.GetHits(), IsEmpty()); + EXPECT_THAT(pl_used.BytesUsed(), Eq(0)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty())); // Add a hit, PL should shift to ALMOST_FULL state Hit hit0(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false, @@ -223,8 +194,8 @@ TEST(PostingListTest, PostingListUsedMinSize) { ICING_EXPECT_OK(pl_used.PrependHit(hit0)); // Size = sizeof(uncompressed hit0) int expected_size = sizeof(Hit); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0))); // Add the smallest hit possible - no score and a delta of 1. PL should shift // to FULL state. @@ -233,16 +204,16 @@ TEST(PostingListTest, PostingListUsedMinSize) { ICING_EXPECT_OK(pl_used.PrependHit(hit1)); // Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0) expected_size += sizeof(Hit); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0))); // Try to add the smallest hit possible. Should fail Hit hit2(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); EXPECT_THAT(pl_used.PrependHit(hit2), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); - EXPECT_LE(pl_used.BytesUsed(), expected_size); - EXPECT_THAT(pl_used.GetHits(), ElementsAre(hit1, hit0)); + EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0))); } TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) { @@ -271,7 +242,7 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) { // only fit two hits. So PrependHitArray should fail. uint32_t num_can_prepend = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( &hits_in[0], hits_in.size(), false); - EXPECT_EQ(num_can_prepend, 2); + EXPECT_THAT(num_can_prepend, Eq(2)); int can_fit_hits = num_can_prepend; // The PL has room for 2 hits. We should be able to add them without any @@ -279,13 +250,13 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) { const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2); num_can_prepend = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( hits_in_ptr, can_fit_hits, false); - EXPECT_EQ(num_can_prepend, can_fit_hits); - EXPECT_EQ(size, pl_used.BytesUsed()); + EXPECT_THAT(num_can_prepend, Eq(can_fit_hits)); + EXPECT_THAT(size, Eq(pl_used.BytesUsed())); std::deque<Hit> hits_pushed; std::transform(hits_in.rbegin(), hits_in.rend() - hits_in.size() + can_fit_hits, std::front_inserter(hits_pushed), HitElt::get_hit); - EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed))); } TEST(PostingListTest, PostingListPrependHitArrayPostingList) { @@ -325,12 +296,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { // five hits without issue, transitioning the PL from EMPTY -> NOT_FULL. uint32_t num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( &hits_in[0], hits_in.size(), false); - EXPECT_EQ(num_could_fit, hits_in.size()); - EXPECT_EQ(byte_size, pl_used.BytesUsed()); + EXPECT_THAT(num_could_fit, Eq(hits_in.size())); + EXPECT_THAT(byte_size, Eq(pl_used.BytesUsed())); std::deque<Hit> hits_pushed; std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); - EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed))); Hit first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1); hits_in.clear(); @@ -369,12 +340,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { // remain in the NOT_FULL state. num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( &hits_in[0], hits_in.size(), false); - EXPECT_EQ(num_could_fit, hits_in.size()); - EXPECT_EQ(byte_size, pl_used.BytesUsed()); + EXPECT_THAT(num_could_fit, Eq(hits_in.size())); + EXPECT_THAT(byte_size, Eq(pl_used.BytesUsed())); // All hits from hits_in were added. std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); - EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed))); first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/3); hits_in.clear(); @@ -402,12 +373,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { // unused space. num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( &hits_in[0], hits_in.size(), false); - EXPECT_EQ(num_could_fit, hits_in.size()); - EXPECT_EQ(byte_size, pl_used.BytesUsed()); + EXPECT_THAT(num_could_fit, Eq(hits_in.size())); + EXPECT_THAT(byte_size, Eq(pl_used.BytesUsed())); // All hits from hits_in were added. std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); - EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed))); first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1); hits_in.clear(); @@ -441,12 +412,12 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { // (1 byte). num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( &hits_in[0], hits_in.size(), false); - EXPECT_EQ(num_could_fit, hits_in.size()); - EXPECT_EQ(size, pl_used.BytesUsed()); + EXPECT_THAT(num_could_fit, Eq(hits_in.size())); + EXPECT_THAT(size, Eq(pl_used.BytesUsed())); // All hits from hits_in were added. std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); - EXPECT_THAT(pl_used.GetHits(), ElementsAreArray(hits_pushed)); + EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAreArray(hits_pushed))); } TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) { @@ -459,30 +430,35 @@ TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) { std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize); // Create an array with one too many hits - vector<HitElt> hits_in_too_many = CreateHits(0, kNumHits + 1); + vector<Hit> hits_in_too_many = + CreateHits(kNumHits + 1, /*desired_byte_length=*/1); + vector<HitElt> hit_elts_in_too_many; + for (const Hit &hit : hits_in_too_many) { + hit_elts_in_too_many.emplace_back(hit); + } ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion( static_cast<void *>(hits_buf.get()), posting_list_utils::min_posting_list_size())); - // PrependHitArray should fail because hits_in_too_many is far too large for - // the minimum size pl. + // PrependHitArray should fail because hit_elts_in_too_many is far too large + // for the minimum size pl. uint32_t num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( - &hits_in_too_many[0], hits_in_too_many.size(), false); - ASSERT_LT(num_could_fit, hits_in_too_many.size()); - ASSERT_EQ(pl_used.BytesUsed(), 0); - ASSERT_THAT(pl_used.GetHits(), testing::IsEmpty()); + &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), false); + ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size())); + ASSERT_THAT(pl_used.BytesUsed(), Eq(0)); + ASSERT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty())); ICING_ASSERT_OK_AND_ASSIGN( pl_used, PostingListUsed::CreateFromUnitializedRegion( static_cast<void *>(hits_buf.get()), kHitsSize)); - // PrependHitArray should fail because hits_in_too_many is one hit too large - // for this pl. + // PrependHitArray should fail because hit_elts_in_too_many is one hit too + // large for this pl. num_could_fit = pl_used.PrependHitArray<HitElt, HitElt::get_hit>( - &hits_in_too_many[0], hits_in_too_many.size(), false); - ASSERT_LT(num_could_fit, hits_in_too_many.size()); - ASSERT_EQ(pl_used.BytesUsed(), 0); - ASSERT_THAT(pl_used.GetHits(), testing::IsEmpty()); + &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), false); + ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size())); + ASSERT_THAT(pl_used.BytesUsed(), Eq(0)); + ASSERT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty())); } TEST(PostingListTest, PostingListStatusJumpFromNotFullToFullAndBack) { @@ -494,13 +470,13 @@ TEST(PostingListTest, PostingListStatusJumpFromNotFullToFullAndBack) { ICING_ASSERT_OK(pl.PrependHit(Hit(Hit::kInvalidValue - 1, 0))); uint32_t bytes_used = pl.BytesUsed(); // Status not full. - CHECK_LE(bytes_used, pl_size - posting_list_utils::kSpecialHitsSize); + ASSERT_THAT(bytes_used, Le(pl_size - posting_list_utils::kSpecialHitsSize)); ICING_ASSERT_OK(pl.PrependHit(Hit(Hit::kInvalidValue >> 2, 0))); // Status should jump to full directly. - CHECK_EQ(pl.BytesUsed(), pl_size); + ASSERT_THAT(pl.BytesUsed(), Eq(pl_size)); pl.PopFrontHits(1); // Status should return to not full as before. - CHECK_EQ(pl.BytesUsed(), bytes_used); + ASSERT_THAT(pl.BytesUsed(), Eq(bytes_used)); } TEST(PostingListTest, DeltaOverflow) { @@ -533,5 +509,150 @@ TEST(PostingListTest, DeltaOverflow) { StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } +TEST(PostingListTest, MoveFrom) { + int size = 3 * posting_list_utils::min_posting_list_size(); + std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf1.get()), size)); + std::vector<Hit> hits1 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); + for (const Hit &hit : hits1) { + ICING_ASSERT_OK(pl_used1.PrependHit(hit)); + } + + std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf2.get()), size)); + std::vector<Hit> hits2 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2); + for (const Hit &hit : hits2) { + ICING_ASSERT_OK(pl_used2.PrependHit(hit)); + } + + ICING_ASSERT_OK(pl_used2.MoveFrom(&pl_used1)); + EXPECT_THAT(pl_used2.GetHits(), + IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend()))); + EXPECT_THAT(pl_used1.GetHits(), IsOkAndHolds(IsEmpty())); +} + +TEST(PostingListTest, MoveFromNullArgumentReturnsInvalidArgument) { + int size = 3 * posting_list_utils::min_posting_list_size(); + std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf1.get()), size)); + std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); + for (const Hit &hit : hits) { + ICING_ASSERT_OK(pl_used1.PrependHit(hit)); + } + + EXPECT_THAT(pl_used1.MoveFrom(/*other=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(pl_used1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits.rbegin(), hits.rend()))); +} + +TEST(PostingListTest, MoveFromInvalidPostingListReturnsInvalidArgument) { + int size = 3 * posting_list_utils::min_posting_list_size(); + std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf1.get()), size)); + std::vector<Hit> hits1 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); + for (const Hit &hit : hits1) { + ICING_ASSERT_OK(pl_used1.PrependHit(hit)); + } + + std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf2.get()), size)); + std::vector<Hit> hits2 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2); + for (const Hit &hit : hits2) { + ICING_ASSERT_OK(pl_used2.PrependHit(hit)); + } + + // Write invalid hits to the beginning of pl_used1 to make it invalid. + Hit invalid_hit; + Hit *first_hit = reinterpret_cast<Hit *>(hits_buf1.get()); + *first_hit = invalid_hit; + ++first_hit; + *first_hit = invalid_hit; + EXPECT_THAT(pl_used2.MoveFrom(&pl_used1), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(pl_used2.GetHits(), + IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend()))); +} + +TEST(PostingListTest, MoveToInvalidPostingListReturnsInvalidArgument) { + int size = 3 * posting_list_utils::min_posting_list_size(); + std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf1.get()), size)); + std::vector<Hit> hits1 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); + for (const Hit &hit : hits1) { + ICING_ASSERT_OK(pl_used1.PrependHit(hit)); + } + + std::unique_ptr<char[]> hits_buf2 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf2.get()), size)); + std::vector<Hit> hits2 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2); + for (const Hit &hit : hits2) { + ICING_ASSERT_OK(pl_used2.PrependHit(hit)); + } + + // Write invalid hits to the beginning of pl_used2 to make it invalid. + Hit invalid_hit; + Hit *first_hit = reinterpret_cast<Hit *>(hits_buf2.get()); + *first_hit = invalid_hit; + ++first_hit; + *first_hit = invalid_hit; + EXPECT_THAT(pl_used2.MoveFrom(&pl_used1), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(pl_used1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend()))); +} + +TEST(PostingListTest, MoveToPostingListTooSmall) { + int size = 3 * posting_list_utils::min_posting_list_size(); + std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used1, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf1.get()), size)); + std::vector<Hit> hits1 = + CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); + for (const Hit &hit : hits1) { + ICING_ASSERT_OK(pl_used1.PrependHit(hit)); + } + + std::unique_ptr<char[]> hits_buf2 = + std::make_unique<char[]>(posting_list_utils::min_posting_list_size()); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used2, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf2.get()), + posting_list_utils::min_posting_list_size())); + std::vector<Hit> hits2 = + CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2); + for (const Hit &hit : hits2) { + ICING_ASSERT_OK(pl_used2.PrependHit(hit)); + } + + EXPECT_THAT(pl_used2.MoveFrom(&pl_used1), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(pl_used1.GetHits(), + IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend()))); + EXPECT_THAT(pl_used2.GetHits(), + IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend()))); +} + } // namespace lib } // namespace icing diff --git a/icing/index/posting-list-utils.cc b/icing/index/main/posting-list-utils.cc index b0e2929..b734767 100644 --- a/icing/index/posting-list-utils.cc +++ b/icing/index/main/posting-list-utils.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/index/posting-list-utils.h" +#include "icing/index/main/posting-list-utils.h" #include "icing/legacy/index/icing-bit-util.h" #include "icing/util/logging.h" diff --git a/icing/index/posting-list-utils.h b/icing/index/main/posting-list-utils.h index fc90d64..77537a7 100644 --- a/icing/index/posting-list-utils.h +++ b/icing/index/main/posting-list-utils.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef ICING_INDEX_POSTING_LIST_UTILS_H_ -#define ICING_INDEX_POSTING_LIST_UTILS_H_ +#ifndef ICING_INDEX_MAIN_POSTING_LIST_UTILS_H_ +#define ICING_INDEX_MAIN_POSTING_LIST_UTILS_H_ #include <cstdint> @@ -26,7 +26,8 @@ namespace posting_list_utils { // Represents the byte length of the two special hits described // in the private section of posting-list-used.h. -static constexpr uint32_t kSpecialHitsSize = sizeof(Hit) * 2; +inline constexpr uint32_t kNumSpecialHits = 2; +inline constexpr uint32_t kSpecialHitsSize = sizeof(Hit) * kNumSpecialHits; constexpr uint32_t min_posting_list_size() { return kSpecialHitsSize; } @@ -41,4 +42,4 @@ bool IsValidPostingListSize(uint32_t size_in_bytes); } // namespace lib } // namespace icing -#endif // ICING_INDEX_POSTING_LIST_UTILS_H_ +#endif // ICING_INDEX_MAIN_POSTING_LIST_UTILS_H_ diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc index b1b5420..ac8d2eb 100644 --- a/icing/jni/icing-search-engine-jni.cc +++ b/icing/jni/icing-search-engine-jni.cc @@ -188,6 +188,18 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGet( } JNIEXPORT jbyteArray JNICALL +Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces( + JNIEnv* env, jclass clazz, jlong native_pointer) { + icing::lib::IcingSearchEngine* icing = + GetIcingSearchEnginePointer(native_pointer); + + icing::lib::GetAllNamespacesResultProto get_all_namespaces_result_proto = + icing->GetAllNamespaces(); + + return SerializeProtoToJniByteArray(env, get_all_namespaces_result_proto); +} + +JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeSearch( JNIEnv* env, jclass clazz, jlong native_pointer, jbyteArray search_spec_bytes, jbyteArray scoring_spec_bytes, diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h index 7136ef8..c33be96 100644 --- a/icing/legacy/index/icing-dynamic-trie.h +++ b/icing/legacy/index/icing-dynamic-trie.h @@ -569,6 +569,7 @@ class IcingDynamicTrie : public IIcingStorage { class CandidateSet; // For testing only. + friend class IcingDynamicTrieTest_TrieShouldRespectLimits_Test; friend class IcingDynamicTrieTest_SyncErrorRecovery_Test; friend class IcingDynamicTrieTest_BitmapsClosedWhenInitFails_Test; void GetHeader(IcingDynamicTrieHeader *hdr) const; diff --git a/icing/legacy/index/icing-dynamic-trie_test.cc b/icing/legacy/index/icing-dynamic-trie_test.cc new file mode 100644 index 0000000..4fae52a --- /dev/null +++ b/icing/legacy/index/icing-dynamic-trie_test.cc @@ -0,0 +1,921 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/legacy/index/icing-dynamic-trie.h" + +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <memory> +#include <string> +#include <unordered_map> +#include <vector> + +#include "icing/text_classifier/lib3/utils/hash/farmhash.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/legacy/core/icing-string-util.h" +#include "icing/legacy/index/icing-filesystem.h" +#include "icing/testing/tmp-directory.h" + +using testing::ElementsAre; + +namespace icing { +namespace lib { + +namespace { + +constexpr std::string_view kKeys[] = { + "", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg", +}; +constexpr uint32_t kNumKeys = ABSL_ARRAYSIZE(kKeys); + +class IcingDynamicTrieTest : public ::testing::Test { + protected: + class NewValueMap : public IcingDynamicTrie::NewValueMap { + public: + const void* GetNewValue(uint32_t old_value_index) const override { + const_cast<NewValueMap*>(this)->buf_ = old_value_index; + return &buf_; + } + + private: + uint32_t buf_; + }; + + // Output trie stats to stderr. + static void StatsDump(const IcingDynamicTrie& trie) { + IcingDynamicTrie::Stats stats; + trie.CollectStats(&stats); + DLOG(INFO) << "Stats:\n" << stats.DumpStats(true); + } + + static void AddToTrie(IcingDynamicTrie* trie, uint32_t num_keys) { + std::string key; + for (uint32_t i = 0; i < kNumKeys; i++) { + key.clear(); + IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i); + bool inserted = trie->Insert(key.c_str(), &i); + ASSERT_TRUE(inserted); + } + } + + static void CheckTrie(const IcingDynamicTrie& trie, uint32_t num_keys) { + std::string key; + for (uint32_t i = 0; i < kNumKeys; i++) { + key.clear(); + IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i); + uint32_t val; + bool found = trie.Find(key.c_str(), &val); + EXPECT_TRUE(found); + EXPECT_EQ(i, val); + } + } + + static void PrintTrie(const IcingDynamicTrie& trie) { + std::vector<std::string> keys; + std::ostringstream os; + DLOG(INFO) << "Trie:\n"; + trie.DumpTrie(&os, &keys); + DLOG(INFO) << os.str(); + } + + void SetUp() override { + trie_files_dir_ = GetTestTempDir() + "/trie_files"; + trie_files_prefix_ = trie_files_dir_ + "/test_file_"; + } + + void TearDown() override { + IcingFilesystem filesystem; + filesystem.DeleteDirectoryRecursively(trie_files_dir_.c_str()); + } + + std::string trie_files_dir_; + std::string trie_files_prefix_; +}; + +constexpr std::string_view kCommonEnglishWords[] = { + "that", "was", "for", "on", "are", "with", "they", "be", "at", + "one", "have", "this", "from", "word", "but", "what", "some", "you", + "had", "the", "and", "can", "out", "other", "were", "which", "their", + "time", "will", "how", "said", "each", "tell", "may", "three"}; +constexpr uint32_t kCommonEnglishWordArrayLen = + sizeof(kCommonEnglishWords) / sizeof(std::string_view); + +TEST_F(IcingDynamicTrieTest, Simple) { + // Test simple key insertions. + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + for (uint32_t i = 0; i < kNumKeys; i++) { + ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i)); + + uint32_t val; + bool found = trie.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val; + } + + EXPECT_EQ(trie.size(), kNumKeys); + + StatsDump(trie); + std::vector<std::string> keys; + std::ostringstream os; + DLOG(INFO) << "Trie:\n"; + trie.DumpTrie(&os, &keys); + DLOG(INFO) << os.str(); + EXPECT_EQ(keys.size(), kNumKeys); +} + +TEST_F(IcingDynamicTrieTest, Init) { + // Test create/init behavior. + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + EXPECT_FALSE(trie.is_initialized()); + EXPECT_FALSE(trie.Init()); + + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + EXPECT_TRUE(trie.Init()); + EXPECT_TRUE(trie.is_initialized()); +} + +TEST_F(IcingDynamicTrieTest, Iterator) { + // Test iterator. + IcingFilesystem filesystem; + uint32_t val; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + for (uint32_t i = 0; i < kNumKeys; i++) { + ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i)); + } + + // We try everything twice to test that Reset also works. + + // Should get the entire trie. + IcingDynamicTrie::Iterator it_all(trie, ""); + for (int i = 0; i < 2; i++) { + uint32_t count = 0; + for (; it_all.IsValid(); it_all.Advance()) { + uint32_t val_idx = it_all.GetValueIndex(); + EXPECT_EQ(it_all.GetValue(), trie.GetValueAtIndex(val_idx)); + count++; + } + EXPECT_EQ(count, kNumKeys); + it_all.Reset(); + } + + // Get everything under "a". + IcingDynamicTrie::Iterator it1(trie, "a"); + for (int i = 0; i < 2; i++) { + ASSERT_TRUE(it1.IsValid()); + EXPECT_STREQ(it1.GetKey(), "ab"); + static const uint32_t kOne = 1; + ASSERT_TRUE(it1.GetValue() != nullptr); + EXPECT_TRUE(!memcmp(it1.GetValue(), &kOne, sizeof(kOne))); + + ASSERT_TRUE(it1.Advance()); + ASSERT_TRUE(it1.IsValid()); + EXPECT_STREQ(it1.GetKey(), "abbb"); + + ASSERT_TRUE(it1.Advance()); + ASSERT_TRUE(it1.IsValid()); + EXPECT_STREQ(it1.GetKey(), "abcdefg"); + + ASSERT_TRUE(it1.Advance()); + ASSERT_TRUE(it1.IsValid()); + EXPECT_STREQ(it1.GetKey(), "abd"); + + ASSERT_TRUE(it1.Advance()); + ASSERT_TRUE(it1.IsValid()); + EXPECT_STREQ(it1.GetKey(), "ac"); + + EXPECT_FALSE(it1.Advance()); + EXPECT_FALSE(it1.IsValid()); + + it1.Reset(); + } + + // Now "b". + IcingDynamicTrie::Iterator it2(trie, "b"); + for (int i = 0; i < 2; i++) { + ASSERT_TRUE(it2.IsValid()); + EXPECT_STREQ(it2.GetKey(), "bac"); + val = 1; + ASSERT_TRUE(it1.GetValue() != nullptr); + EXPECT_TRUE(!memcmp(it1.GetValue(), &val, sizeof(val))); + val = 4; + ASSERT_TRUE(it2.GetValue() != nullptr); + EXPECT_TRUE(!memcmp(it2.GetValue(), &val, sizeof(val))); + + ASSERT_TRUE(it2.Advance()); + ASSERT_TRUE(it2.IsValid()); + EXPECT_STREQ(it2.GetKey(), "bacd"); + + ASSERT_TRUE(it2.Advance()); + ASSERT_TRUE(it2.IsValid()); + EXPECT_STREQ(it2.GetKey(), "bb"); + + EXPECT_FALSE(it2.Advance()); + EXPECT_FALSE(it2.IsValid()); + + it2.Reset(); + } + + // Get everything under "ab". + IcingDynamicTrie::Iterator it3(trie, "ab"); + for (int i = 0; i < 2; i++) { + ASSERT_TRUE(it3.IsValid()); + EXPECT_STREQ(it3.GetKey(), "ab"); + val = 1; + ASSERT_TRUE(it3.GetValue() != nullptr); + EXPECT_TRUE(!memcmp(it3.GetValue(), &val, sizeof(val))); + + ASSERT_TRUE(it3.Advance()); + ASSERT_TRUE(it3.IsValid()); + EXPECT_STREQ(it3.GetKey(), "abbb"); + + ASSERT_TRUE(it3.Advance()); + ASSERT_TRUE(it3.IsValid()); + EXPECT_STREQ(it3.GetKey(), "abcdefg"); + + ASSERT_TRUE(it3.Advance()); + ASSERT_TRUE(it3.IsValid()); + EXPECT_STREQ(it3.GetKey(), "abd"); + + EXPECT_FALSE(it3.Advance()); + EXPECT_FALSE(it3.IsValid()); + + it3.Reset(); + } + + // Should match only one key exactly. + constexpr std::string_view kOneMatch[] = { + "abd", + "abcd", + "abcdef", + "abcdefg", + }; + // With the following match: + constexpr std::string_view kOneMatchMatched[] = { + "abd", + "abcdefg", + "abcdefg", + "abcdefg", + }; + + for (size_t k = 0; k < ABSL_ARRAYSIZE(kOneMatch); k++) { + IcingDynamicTrie::Iterator it_single(trie, kOneMatch[k].data()); + for (int i = 0; i < 2; i++) { + ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k]; + EXPECT_STREQ(it_single.GetKey(), kOneMatchMatched[k].data()); + EXPECT_FALSE(it_single.Advance()) << kOneMatch[k]; + EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k]; + + it_single.Reset(); + } + } + + // Matches nothing. + constexpr std::string_view kNoMatch[] = { + "abbd", + "abcdeg", + "abcdefh", + }; + for (size_t k = 0; k < ABSL_ARRAYSIZE(kNoMatch); k++) { + IcingDynamicTrie::Iterator it_empty(trie, kNoMatch[k].data()); + for (int i = 0; i < 2; i++) { + EXPECT_FALSE(it_empty.IsValid()); + + it_empty.Reset(); + } + } + + // Clear. + trie.Clear(); + EXPECT_FALSE(IcingDynamicTrie::Iterator(trie, "").IsValid()); + EXPECT_EQ(0u, trie.size()); + EXPECT_EQ(1.0, trie.min_free_fraction()); +} + +TEST_F(IcingDynamicTrieTest, Persistence) { + // Test persistence on the English dictionary. + IcingFilesystem filesystem; + { + // Test with a trie including strings in words. Test will fail if + // words are not unique. + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + EXPECT_FALSE(trie.Init()); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &i)); + } + // Explicitly omit sync. + + StatsDump(trie); + } + + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Init()); + EXPECT_EQ(0U, trie.size()); + + for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &i)); + } + trie.Sync(); + + StatsDump(trie); + } + + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Init()); + + // Make sure we can find everything with the right value. + uint32_t found_count = 0; + uint32_t matched_count = 0; + for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) { + uint32_t val; + bool found = trie.Find(kCommonEnglishWords[i].data(), &val); + if (found) { + found_count++; + if (i == val) { + matched_count++; + } + } + } + EXPECT_EQ(found_count, kCommonEnglishWordArrayLen); + EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen); + + StatsDump(trie); + } +} + +TEST_F(IcingDynamicTrieTest, PersistenceShared) { + // Test persistence on the English dictionary. + IcingFilesystem filesystem; + IcingDynamicTrie::RuntimeOptions ropt; + + { + // Test with a trie including strings in words. Test will fail if + // words are not unique. + ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc; + IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem); + EXPECT_FALSE(trie.Init()); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + uint32_t next_reopen = kCommonEnglishWordArrayLen / 16; + for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &i)); + + if (i == next_reopen) { + ASSERT_NE(0u, trie.UpdateCrc()); + trie.Close(); + ASSERT_TRUE(trie.Init()); + + next_reopen += next_reopen / 2; + } + } + // Explicitly omit sync. Shared should automatically persist. + + StatsDump(trie); + } + + // Go back and forth between the two policies. + for (int i = 0; i < 5; i++) { + if (i % 2 == 0) { + DLOG(INFO) << "Opening with map shared"; + ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc; + } else { + DLOG(INFO) << "Opening with explicit flush"; + ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kExplicitFlush; + } + IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem); + ASSERT_TRUE(trie.Init()); + + // Make sure we can find everything with the right value. + uint32_t found_count = 0; + uint32_t matched_count = 0; + for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) { + uint32_t val; + bool found = trie.Find(kCommonEnglishWords[i].data(), &val); + if (found) { + found_count++; + if (i == val) { + matched_count++; + } + } + } + EXPECT_EQ(found_count, kCommonEnglishWordArrayLen); + EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen); + + StatsDump(trie); + } + + // Clear and re-open. + ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc; + IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem); + ASSERT_TRUE(trie.Init()); + trie.Clear(); + trie.Close(); + ASSERT_TRUE(trie.Init()); +} + +TEST_F(IcingDynamicTrieTest, Sync) { + IcingFilesystem filesystem; + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + for (uint32_t i = 0; i < kNumKeys; i++) { + ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i)); + + uint32_t val; + bool found = trie.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val; + } + + StatsDump(trie); + PrintTrie(trie); + + trie.Sync(); + + for (uint32_t i = 0; i < kNumKeys; i++) { + uint32_t val; + bool found = trie.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val; + } + } + + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Init()); + + for (uint32_t i = 0; i < kNumKeys; i++) { + uint32_t val; + bool found = trie.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val; + } + + StatsDump(trie); + PrintTrie(trie); + } +} + +TEST_F(IcingDynamicTrieTest, LimitsZero) { + // Don't crash if we set limits to 0. + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_FALSE(trie.CreateIfNotExist(IcingDynamicTrie::Options(0, 0, 0, 0))); +} + +TEST_F(IcingDynamicTrieTest, LimitsSmall) { + // Test limits with a few keys. + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist( + IcingDynamicTrie::Options(10, 300, 30, sizeof(uint32_t)))); + ASSERT_TRUE(trie.Init()); + + ASSERT_LT(3U, kNumKeys); + + for (uint32_t i = 0; i < 3; i++) { + ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i)) << i; + + uint32_t val; + bool found = trie.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val; + } + + uint32_t val = 3; + EXPECT_FALSE(trie.Insert(kKeys[3].data(), &val)); + + StatsDump(trie); + PrintTrie(trie); +} + +TEST_F(IcingDynamicTrieTest, DISABLEDFingerprintedKeys) { + IcingFilesystem filesystem; + IcingDynamicTrie::Options options(4 << 20, 4 << 20, 20 << 20, + sizeof(uint32_t)); + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(options)); + ASSERT_TRUE(trie.Init()); + IcingDynamicTrie triefp(trie_files_prefix_ + ".fps", + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(triefp.CreateIfNotExist(options)); + ASSERT_TRUE(triefp.Init()); + + static const uint32_t kNumKeys = 1000000; + std::string key; + for (uint32_t i = 0; i < kNumKeys; i++) { + key.clear(); + IcingStringUtil::SStringAppendF( + &key, 1000, "content://gmail-ls/account/conversation/%u/message/%u", i, + 10 * i); + ASSERT_TRUE(trie.Insert(key.c_str(), &i)); + + // Now compute a fingerprint. + uint64_t fpkey = tc3farmhash::Fingerprint64(key); + + // Convert to base255 since keys in trie cannot contain 0. + uint8_t fpkey_base255[9]; + for (int j = 0; j < 8; j++) { + fpkey_base255[j] = (fpkey % 255) + 1; + fpkey /= 255; + } + fpkey_base255[8] = '\0'; + ASSERT_TRUE( + triefp.Insert(reinterpret_cast<const char*>(fpkey_base255), &i)); + + // Sync periodically to gauge write locality. + if ((i + 1) % (kNumKeys / 10) == 0) { + DLOG(INFO) << "Trie sync"; + trie.Sync(); + DLOG(INFO) << "Trie fp sync"; + triefp.Sync(); + } + } + + DLOG(INFO) << "Trie stats"; + StatsDump(trie); + DLOG(INFO) << "Trie fp stats"; + StatsDump(triefp); +} + +TEST_F(IcingDynamicTrieTest, AddDups) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + static const uint32_t kNumKeys = 5000; + AddToTrie(&trie, kNumKeys); + CheckTrie(trie, kNumKeys); + + DLOG(INFO) << "Trie stats"; + StatsDump(trie); + + AddToTrie(&trie, kNumKeys); + CheckTrie(trie, kNumKeys); + DLOG(INFO) << "Trie stats"; + StatsDump(trie); +} + +TEST_F(IcingDynamicTrieTest, Properties) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + static const uint32_t kOne = 1; + uint32_t val_idx; + trie.Insert("abcd", &kOne, &val_idx, false); + trie.SetProperty(val_idx, 0); + trie.SetProperty(val_idx, 3); + + { + IcingDynamicTrie::PropertyReader reader(trie, 3); + ASSERT_TRUE(reader.Exists()); + EXPECT_TRUE(reader.HasProperty(val_idx)); + EXPECT_FALSE(reader.HasProperty(1000)); + } + + // Disappear after close. + trie.Close(); + ASSERT_TRUE(trie.Init()); + { + IcingDynamicTrie::PropertyReader reader(trie, 3); + EXPECT_FALSE(reader.HasProperty(val_idx)); + } + + // Persist after sync. + trie.Insert("abcd", &kOne, &val_idx, false); + trie.SetProperty(val_idx, 1); + ASSERT_TRUE(trie.Sync()); + trie.Close(); + ASSERT_TRUE(trie.Init()); + + uint32_t val; + ASSERT_TRUE(trie.Find("abcd", &val, &val_idx)); + EXPECT_EQ(1u, val); + { + IcingDynamicTrie::PropertyReader reader(trie, 1); + EXPECT_TRUE(reader.HasProperty(val_idx)); + } + + // Get all. + { + IcingDynamicTrie::PropertyReadersAll readers(trie); + ASSERT_EQ(4u, readers.size()); + EXPECT_TRUE(readers.Exists(0)); + EXPECT_TRUE(readers.Exists(1)); + EXPECT_FALSE(readers.Exists(2)); + EXPECT_TRUE(readers.Exists(3)); + } +} + +TEST_F(IcingDynamicTrieTest, ClearSingleProperty) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + static const uint32_t kOne = 1; + uint32_t val_idx[3]; + trie.Insert("abcd", &kOne, &val_idx[0], false); + trie.SetProperty(val_idx[0], 0); + trie.SetProperty(val_idx[0], 3); + + trie.Insert("efgh", &kOne, &val_idx[1], false); + trie.SetProperty(val_idx[1], 0); + trie.SetProperty(val_idx[1], 3); + + trie.Insert("ijkl", &kOne, &val_idx[2], false); + trie.SetProperty(val_idx[2], 0); + trie.SetProperty(val_idx[2], 3); + + { + IcingDynamicTrie::PropertyReadersAll readers(trie); + ASSERT_EQ(4u, readers.size()); + EXPECT_TRUE(readers.Exists(0)); + EXPECT_FALSE(readers.Exists(1)); + EXPECT_FALSE(readers.Exists(2)); + EXPECT_TRUE(readers.Exists(3)); + for (size_t i = 0; i < readers.size(); i++) { + if (readers.Exists(i)) { + for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) { + EXPECT_TRUE(readers.HasProperty(i, val_idx[j])); + } + } + } + } + + EXPECT_TRUE(trie.ClearPropertyForAllValues(3)); + + { + IcingDynamicTrie::PropertyReadersAll readers(trie); + ASSERT_EQ(4u, readers.size()); + EXPECT_TRUE(readers.Exists(0)); + EXPECT_FALSE(readers.Exists(1)); + EXPECT_FALSE(readers.Exists(2)); + // Clearing the property causes all values to be deleted. + EXPECT_FALSE(readers.Exists(3)); + for (size_t i = 0; i < readers.size(); i++) { + for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) { + if (i == 0) { + EXPECT_TRUE(readers.HasProperty(i, val_idx[j])); + } else { + EXPECT_FALSE(readers.HasProperty(i, val_idx[j])); + } + } + } + } +} + +TEST_F(IcingDynamicTrieTest, Compact) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + for (uint32_t i = 0; i < kNumKeys; i++) { + ASSERT_TRUE(trie.Insert(kKeys[i].data(), &i)); + + uint32_t val; + bool found = trie.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val; + } + + EXPECT_EQ(trie.size(), kNumKeys); + + StatsDump(trie); + PrintTrie(trie); + + IcingDynamicTrie trie2(trie_files_prefix_ + "-2", + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie2.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie2.Init()); + + std::unordered_map<uint32_t, uint32_t> old_to_new_tvi; + trie.Compact(NewValueMap(), &trie2, &old_to_new_tvi); + for (uint32_t i = 0; i < kNumKeys; i++) { + uint32_t val; + bool found = trie2.Find(kKeys[i].data(), &val); + EXPECT_TRUE(found) << kKeys[i]; + EXPECT_TRUE(old_to_new_tvi.find(val) != old_to_new_tvi.end()); + } +} + +} // namespace + +// The tests below are accessing private methods and fields of IcingDynamicTrie +// so can't be in the anonymous namespace. + +TEST_F(IcingDynamicTrieTest, TrieShouldRespectLimits) { + // Test limits on numbers of nodes, nexts, and suffixes size. + IcingFilesystem filesystem; + + // These 3 numbers are the entities we need in order to insert all the test + // words before the last one. + uint32_t num_nodes_enough; + uint32_t num_nexts_enough; + uint32_t suffixes_size_enough; + + // First, try to fill the 3 numbers above. + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Remove()); + // Creates a trie with enough numbers of nodes, nexts, and suffix file size. + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options( + /*max_nodes_in=*/1000, /*max_nexts_in=*/1000, + /*max_suffixes_size_in=*/1000, sizeof(uint32_t)))); + ASSERT_TRUE(trie.Init()); + + // Inserts all the test words before the last one. + uint32_t value = 0; + for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value)); + } + + IcingDynamicTrieHeader header; + trie.GetHeader(&header); + + // Before each insertion, it requires that there're (2 + 1 + key_length) + // nodes left, so we need 8 nodes to insert the last word. +7 here will make + // it just enough to insert the word before the last one. + num_nodes_enough = header.num_nodes() + 7; + + // Before each insertion, it requires that there're (2 + 1 + key_length + + // kMaxNextArraySize) nexts left, so we need (8 + kMaxNextArraySize) nexts + // to insert the last word. (7 + kMaxNextArraySize) here will make it just + // enough to insert the word before the last one. + num_nexts_enough = + header.num_nexts() + 7 + IcingDynamicTrie::kMaxNextArraySize; + + // Before each insertion, it requires that there're (1 + key_length + + // value_size) bytes left for suffixes, so we need (6 + sizeof(uint32_t)) + // bytes to insert the last word. (5 + sizeof(uint32_t)) here will make it + // just enough to insert the word before the last one. + suffixes_size_enough = header.suffixes_size() + 5 + sizeof(uint32_t); + } + + // Test a trie with just enough number of nodes. + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Remove()); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options( + num_nodes_enough, /*max_nexts_in=*/1000, + /*max_suffixes_size_in=*/1000, sizeof(uint32_t)))); + ASSERT_TRUE(trie.Init()); + + // Inserts all the test words before the last one. + uint32_t value = 0; + for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value)); + } + + // Fails to insert the last word because no enough nodes left. + EXPECT_FALSE(trie.Insert( + kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(), &value)); + } + + // Test a trie with just enough number of nexts. + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Remove()); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options( + /*max_nodes_in=*/1000, num_nexts_enough, + /*max_suffixes_size_in=*/1000, sizeof(uint32_t)))); + ASSERT_TRUE(trie.Init()); + + // Inserts all the test words before the last one. + uint32_t value = 0; + for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value)); + } + + // Fails to insert the last word because no enough nexts left. + EXPECT_FALSE(trie.Insert( + kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(), &value)); + } + + // Test a trie with just enough suffixes size. + { + IcingDynamicTrie trie(trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions(), &filesystem); + ASSERT_TRUE(trie.Remove()); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options( + /*max_nodes_in=*/1000, /*max_nexts_in=*/1000, suffixes_size_enough, + sizeof(uint32_t)))); + ASSERT_TRUE(trie.Init()); + + // Inserts all the test words before the last one. + uint32_t value = 0; + for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) { + ASSERT_TRUE(trie.Insert(kCommonEnglishWords[i].data(), &value)); + } + + // Fails to insert the last word because no enough space for more suffixes. + EXPECT_FALSE(trie.Insert( + kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(), &value)); + } +} + +TEST_F(IcingDynamicTrieTest, SyncErrorRecovery) { + IcingFilesystem filesystem; + IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + + static const uint32_t kNumKeys = 5000; + AddToTrie(&trie, kNumKeys); + CheckTrie(trie, kNumKeys); + + trie.Sync(); + trie.Close(); + + // Reach into the file and set the value_size. + ASSERT_TRUE(trie.Init()); + IcingDynamicTrieHeader hdr; + trie.GetHeader(&hdr); + hdr.set_value_size(hdr.value_size() + 123); + trie.SetHeader(hdr); + trie.Close(); + + ASSERT_FALSE(trie.Init()); +} + +TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) { + // Create trie with one property. + IcingFilesystem filesystem; + IcingDynamicTrie trie( + trie_files_prefix_, + IcingDynamicTrie::RuntimeOptions().set_storage_policy( + IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc), + &filesystem); + ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options())); + ASSERT_TRUE(trie.Init()); + ASSERT_TRUE(trie.deleted_bitmap_); + trie.SetProperty(0, 0); + ASSERT_EQ(1, trie.property_bitmaps_.size()); + ASSERT_TRUE(trie.property_bitmaps_[0]); + trie.Close(); + + // Intentionally corrupt deleted_bitmap file to make Init() fail. + FILE* fp = fopen(trie.deleted_bitmap_filename_.c_str(), "r+"); + ASSERT_TRUE(fp); + ASSERT_EQ(16, fwrite("################", 1, 16, fp)); + fclose(fp); + ASSERT_FALSE(trie.Init()); + + // Check that both the bitmap and the property files have been closed. + ASSERT_FALSE(trie.deleted_bitmap_); + ASSERT_EQ(0, trie.property_bitmaps_.size()); +} + +} // namespace lib +} // namespace icing diff --git a/icing/legacy/index/icing-filesystem.h b/icing/legacy/index/icing-filesystem.h index 2b10c1c..f645632 100644 --- a/icing/legacy/index/icing-filesystem.h +++ b/icing/legacy/index/icing-filesystem.h @@ -17,13 +17,15 @@ #ifndef ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_ #define ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_ -#include <stdint.h> -#include <stdio.h> -#include <string.h> +#include <sys/types.h> +#include <cstddef> +#include <cstdint> +#include <cstdio> #include <memory> #include <string> #include <unordered_set> +#include <utility> #include <vector> namespace icing { diff --git a/icing/legacy/index/icing-mmapper.h b/icing/legacy/index/icing-mmapper.h index bf62aa5..d054c11 100644 --- a/icing/legacy/index/icing-mmapper.h +++ b/icing/legacy/index/icing-mmapper.h @@ -22,9 +22,11 @@ #ifndef ICING_LEGACY_INDEX_ICING_MMAPPER_H_ #define ICING_LEGACY_INDEX_ICING_MMAPPER_H_ -#include <stdint.h> #include <unistd.h> +#include <cstddef> +#include <cstdint> + namespace icing { namespace lib { diff --git a/icing/legacy/index/icing-storage.h b/icing/legacy/index/icing-storage.h index cc06c54..58b6aa1 100644 --- a/icing/legacy/index/icing-storage.h +++ b/icing/legacy/index/icing-storage.h @@ -20,6 +20,7 @@ #ifndef ICING_LEGACY_INDEX_ICING_STORAGE_H_ #define ICING_LEGACY_INDEX_ICING_STORAGE_H_ +#include <cstdint> #include <string> namespace icing { diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc new file mode 100644 index 0000000..0e2eb2a --- /dev/null +++ b/icing/testing/hit-test-utils.cc @@ -0,0 +1,58 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/testing/hit-test-utils.h" + +namespace icing { +namespace lib { + +// Returns a hit that has a delta of desired_byte_length from last_hit. +Hit CreateHit(Hit last_hit, int desired_byte_length) { + Hit hit = + (last_hit.section_id() == kMinSectionId) + ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score()) + : Hit(last_hit.section_id() - 1, last_hit.document_id(), + last_hit.score()); + uint8_t buf[5]; + while (VarInt::Encode(last_hit.value() - hit.value(), buf) < + desired_byte_length) { + hit = (hit.section_id() == kMinSectionId) + ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score()) + : Hit(hit.section_id() - 1, hit.document_id(), hit.score()); + } + return hit; +} + +// Returns a vector of num_hits Hits with the first hit starting at start_docid +// and with 1-byte deltas. +std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits, + int desired_byte_length) { + std::vector<Hit> hits; + if (num_hits < 1) { + return hits; + } + hits.push_back( + Hit(/*section_id=*/1, /*document_id=*/start_docid, Hit::kMaxHitScore)); + while (hits.size() < num_hits) { + hits.push_back(CreateHit(hits.back(), desired_byte_length)); + } + return hits; +} + +std::vector<Hit> CreateHits(int num_hits, int desired_byte_length) { + return CreateHits(/*start_docid=*/0, num_hits, desired_byte_length); +} + +} // namespace lib +} // namespace icing diff --git a/icing/testing/hit-test-utils.h b/icing/testing/hit-test-utils.h new file mode 100644 index 0000000..e236ec0 --- /dev/null +++ b/icing/testing/hit-test-utils.h @@ -0,0 +1,43 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_TESTING_HIT_TEST_UTILS_H_ +#define ICING_TESTING_HIT_TEST_UTILS_H_ + +#include <vector> + +#include "icing/index/hit/hit.h" +#include "icing/legacy/index/icing-bit-util.h" +#include "icing/schema/section.h" +#include "icing/store/document-id.h" + +namespace icing { +namespace lib { + +// Returns a hit that has a delta of desired_byte_length from last_hit. +Hit CreateHit(Hit last_hit, int desired_byte_length); + +// Returns a vector of num_hits Hits with the first hit starting at start_docid +// and with desired_byte_length deltas. +std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits, + int desired_byte_length); + +// Returns a vector of num_hits Hits with the first hit starting at 0 and each +// with desired_byte_length deltas. +std::vector<Hit> CreateHits(int num_hits, int desired_byte_length); + +} // namespace lib +} // namespace icing + +#endif // ICING_TESTING_HIT_TEST_UTILS_H_ diff --git a/icing/text_classifier/lib3/utils/base/logging.h b/icing/text_classifier/lib3/utils/base/logging.h index bf02f65..92d775e 100644 --- a/icing/text_classifier/lib3/utils/base/logging.h +++ b/icing/text_classifier/lib3/utils/base/logging.h @@ -22,7 +22,6 @@ #include "icing/text_classifier/lib3/utils/base/logging_levels.h" #include "icing/text_classifier/lib3/utils/base/port.h" - namespace libtextclassifier3 { namespace logging { diff --git a/icing/text_classifier/lib3/utils/base/statusor.h b/icing/text_classifier/lib3/utils/base/statusor.h index f5fae7a..9ec3d91 100644 --- a/icing/text_classifier/lib3/utils/base/statusor.h +++ b/icing/text_classifier/lib3/utils/base/statusor.h @@ -86,6 +86,8 @@ class StatusOr { // Conversion assignment operator, T must be assignable from U template <typename U> inline StatusOr& operator=(const StatusOr<U>& other); + template <typename U> + inline StatusOr& operator=(StatusOr<U>&& other); inline ~StatusOr(); @@ -134,6 +136,40 @@ class StatusOr { friend class StatusOr; private: + void Clear() { + if (ok()) { + value_.~T(); + } + } + + // Construct the value through placement new with the passed argument. + template <typename... Arg> + void MakeValue(Arg&&... arg) { + new (&value_) T(std::forward<Arg>(arg)...); + } + + // Creates a valid instance of type T constructed with U and assigns it to + // value_. Handles how to properly assign to value_ if value_ was never + // actually initialized (if this is currently non-OK). + template <typename U> + void AssignValue(U&& value) { + if (ok()) { + value_ = std::forward<U>(value); + } else { + MakeValue(std::forward<U>(value)); + status_ = Status::OK; + } + } + + // Creates a status constructed with U and assigns it to status_. It also + // properly destroys value_ if this is OK and value_ represents a valid + // instance of T. + template <typename U> + void AssignStatus(U&& v) { + Clear(); + status_ = static_cast<Status>(std::forward<U>(v)); + } + Status status_; // The members of unions do not require initialization and are not destructed // unless specifically called. This allows us to construct instances of @@ -210,35 +246,47 @@ inline StatusOr<T>::StatusOr(U&& value) : StatusOr(T(std::forward<U>(value))) {} template <typename T> inline StatusOr<T>& StatusOr<T>::operator=(const StatusOr& other) { - status_ = other.status_; - if (status_.ok()) { - value_ = other.value_; + if (other.ok()) { + AssignValue(other.value_); + } else { + AssignStatus(other.status_); } return *this; } template <typename T> inline StatusOr<T>& StatusOr<T>::operator=(StatusOr&& other) { - status_ = other.status_; - if (status_.ok()) { - value_ = std::move(other.value_); + if (other.ok()) { + AssignValue(std::move(other.value_)); + } else { + AssignStatus(std::move(other.status_)); } return *this; } template <typename T> inline StatusOr<T>::~StatusOr() { - if (ok()) { - value_.~T(); - } + Clear(); } template <typename T> template <typename U> inline StatusOr<T>& StatusOr<T>::operator=(const StatusOr<U>& other) { - status_ = other.status_; - if (status_.ok()) { - value_ = other.value_; + if (other.ok()) { + AssignValue(other.value_); + } else { + AssignStatus(other.status_); + } + return *this; +} + +template <typename T> +template <typename U> +inline StatusOr<T>& StatusOr<T>::operator=(StatusOr<U>&& other) { + if (other.ok()) { + AssignValue(std::move(other.value_)); + } else { + AssignStatus(std::move(other.status_)); } return *this; } |