// Copyright (C) 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "icing/index/main/posting-list-hit-serializer.h" #include #include #include #include #include #include #include #include "icing/text_classifier/lib3/utils/base/status.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/file/posting_list/posting-list-used.h" #include "icing/index/hit/hit.h" #include "icing/legacy/index/icing-bit-util.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/hit-test-utils.h" using testing::ElementsAre; using testing::ElementsAreArray; using testing::Eq; using testing::Gt; using testing::IsEmpty; using testing::IsFalse; using testing::IsTrue; using testing::Le; using testing::Lt; namespace icing { namespace lib { namespace { struct HitElt { HitElt() = default; explicit HitElt(const Hit &hit_in) : hit(hit_in) {} static Hit get_hit(const HitElt &hit_elt) { return hit_elt.hit; } Hit hit; }; TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) { PostingListHitSerializer serializer; static const int kNumHits = 2551; static const size_t kHitsSize = kNumHits * sizeof(Hit); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize)); // Make used. Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/56, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0)); // Size = sizeof(uncompressed hit0::Value) // + sizeof(hit0::Flags) // + sizeof(hit0::TermFrequency) int expected_size = sizeof(Hit::Value) + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0))); Hit hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); uint8_t delta_buf[VarInt::kMaxEncodedLen64]; size_t delta_len = PostingListHitSerializer::EncodeNextHitValue( /*next_hit_value=*/hit1.value(), /*curr_hit_value=*/hit0.value(), delta_buf); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1)); // Size = sizeof(uncompressed hit1::Value) // + sizeof(hit0-hit1) // + sizeof(hit0::Flags) // + sizeof(hit0::TermFrequency) expected_size += delta_len; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit1, hit0))); Hit hit2(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/56, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); delta_len = PostingListHitSerializer::EncodeNextHitValue( /*next_hit_value=*/hit2.value(), /*curr_hit_value=*/hit1.value(), delta_buf); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2)); // Size = sizeof(uncompressed hit2::Value) + sizeof(hit2::Flags) // + sizeof(hit2::TermFrequency) // + sizeof(hit1-hit2) // + sizeof(hit0-hit1) // + sizeof(hit0::flags) // + sizeof(hit0::term_frequency) expected_size += delta_len + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit2, hit1, hit0))); Hit hit3(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); delta_len = PostingListHitSerializer::EncodeNextHitValue( /*next_hit_value=*/hit3.value(), /*curr_hit_value=*/hit2.value(), delta_buf); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3)); // Size = sizeof(uncompressed hit3::Value) // + sizeof(hit2-hit3) + sizeof(hit2::Flags) // + sizeof(hit2::TermFrequency) // + sizeof(hit1-hit2) // + sizeof(hit0-hit1) // + sizeof(hit0::flags) // + sizeof(hit0::term_frequency) expected_size += delta_len; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0))); } TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull_withFlags) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Fill up the compressed region. // Transitions: // Adding hit0: EMPTY -> NOT_FULL // Adding hit1: NOT_FULL -> NOT_FULL // Adding hit2: NOT_FULL -> NOT_FULL Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3); Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2, /*term_frequency=*/57, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); EXPECT_THAT(hit2.has_flags(), IsTrue()); EXPECT_THAT(hit2.has_term_frequency(), IsTrue()); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2)); // Size used will be 4 (hit2::Value) + 1 (hit2::Flags) + 1 // (hit2::TermFrequency) + 2 (hit1-hit2) + 3 (hit0-hit1) = 11 bytes int expected_size = sizeof(Hit::Value) + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency) + 2 + 3; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit2, hit1, hit0))); // Add one more hit to transition NOT_FULL -> ALMOST_FULL Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); EXPECT_THAT(hit3.has_flags(), IsFalse()); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3)); // Storing them in the compressed region requires 4 (hit3::Value) + 3 // (hit2-hit3) + 1 (hit2::Flags) + 1 (hit2::TermFrequency) + 2 (hit1-hit2) + 3 // (hit0-hit1) = 14 bytes, but there are only 12 bytes in the compressed // region. So instead, the posting list will transition to ALMOST_FULL. // The in-use compressed region will actually shrink from 11 bytes to 10 bytes // because the uncompressed version of hit2 will be overwritten with the // compressed delta of hit2. hit3 will be written to one of the special hits. // Because we're in ALMOST_FULL, the expected size is the size of the pl minus // the one hit used to mark the posting list as ALMOST_FULL. expected_size = pl_size - sizeof(Hit); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0))); // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4)); // There are currently 10 bytes in use in the compressed region. hit3 will // have a 2-byte delta, which fits in the compressed region. Hit3 will be // moved from the special hit to the compressed region (which will have 12 // bytes in use after adding hit3). Hit4 will be placed in one of the special // hits and the posting list will remain in ALMOST_FULL. EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0))); // Add one more hit to transition ALMOST_FULL -> FULL Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5)); // There are currently 12 bytes in use in the compressed region. hit4 will // have a 2-byte delta which will not fit in the compressed region. So hit4 // will remain in one of the special hits and hit5 will occupy the other, // making the posting list FULL. EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0))); // The posting list is FULL. Adding another hit should fail. Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1); EXPECT_THAT(serializer.PrependHit(&pl_used, hit6), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Fill up the compressed region. // Transitions: // Adding hit0: EMPTY -> NOT_FULL // Adding hit1: NOT_FULL -> NOT_FULL // Adding hit2: NOT_FULL -> NOT_FULL Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3); Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/3); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2)); // Size used will be 4 (hit2::Value) + 3 (hit1-hit2) + 3 (hit0-hit1) // = 10 bytes int expected_size = sizeof(Hit::Value) + 3 + 3; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit2, hit1, hit0))); // Add one more hit to transition NOT_FULL -> ALMOST_FULL Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3)); // Storing them in the compressed region requires 4 (hit3::Value) + 3 // (hit2-hit3) + 3 (hit1-hit2) + 3 (hit0-hit1) = 13 bytes, but there are only // 12 bytes in the compressed region. So instead, the posting list will // transition to ALMOST_FULL. // The in-use compressed region will actually shrink from 10 bytes to 9 bytes // because the uncompressed version of hit2 will be overwritten with the // compressed delta of hit2. hit3 will be written to one of the special hits. // Because we're in ALMOST_FULL, the expected size is the size of the pl minus // the one hit used to mark the posting list as ALMOST_FULL. expected_size = pl_size - sizeof(Hit); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0))); // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4)); // There are currently 9 bytes in use in the compressed region. Hit3 will // have a 2-byte delta, which fits in the compressed region. Hit3 will be // moved from the special hit to the compressed region (which will have 11 // bytes in use after adding hit3). Hit4 will be placed in one of the special // hits and the posting list will remain in ALMOST_FULL. EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0))); // Add one more hit to transition ALMOST_FULL -> FULL Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5)); // There are currently 11 bytes in use in the compressed region. Hit4 will // have a 2-byte delta which will not fit in the compressed region. So hit4 // will remain in one of the special hits and hit5 will occupy the other, // making the posting list FULL. EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0))); // The posting list is FULL. Adding another hit should fail. Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1); EXPECT_THAT(serializer.PrependHit(&pl_used, hit6), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } TEST(PostingListHitSerializerTest, PrependHitsWithSameValue) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Fill up the compressed region. Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3); Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2, /*term_frequency=*/57, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); // Create hit3 with the same value but different flags as hit2 (hit3_flags // is set to have all currently-defined flags enabled) Hit::Flags hit3_flags = 0; for (int i = 0; i < Hit::kNumFlagsInFlagsField; ++i) { hit3_flags |= (1 << i); } Hit hit3(hit2.value(), /*term_frequency=*/hit2.term_frequency(), /*flags=*/hit3_flags); // hit3 is larger than hit2 (its flag value is larger), and so needs to be // prepended first ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3)); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2)); // Posting list is now ALMOST_FULL // ---------------------- // 23-21 delta(Hit #0) // 20-19 delta(Hit #1) // 18 term-frequency(Hit #2) // 17 flags(Hit #2) // 16 delta(Hit #2) = 0 // 15-12 // 11-6 Hit #3 // 5-0 kSpecialHit // ---------------------- int bytes_used = pl_size - sizeof(Hit); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit2, hit3, hit1, hit0))); } TEST(PostingListHitSerializerTest, PostingListUsedMinSize) { PostingListHitSerializer serializer; // Min size = 12 ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion( &serializer, serializer.GetMinPostingListSize())); // PL State: EMPTY EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty())); // Add a hit, PL should shift to ALMOST_FULL state Hit hit0(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/0, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0)); // Size = sizeof(uncompressed hit0) int expected_size = sizeof(Hit); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0))); // Add the smallest hit possible - no term_frequency, non-prefix hit and a // delta of 0b10. PL should shift to FULL state. Hit hit1(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/0, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1)); // Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0) expected_size += sizeof(Hit); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit1, hit0))); // Try to add the smallest hit possible. Should fail Hit hit2(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/0, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); EXPECT_THAT(serializer.PrependHit(&pl_used, hit2), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size)); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit1, hit0))); } TEST(PostingListHitSerializerTest, PostingListPrependHitArrayMinSizePostingList) { PostingListHitSerializer serializer; // Min Size = 12 int pl_size = serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits_in; hits_in.emplace_back(Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); std::reverse(hits_in.begin(), hits_in.end()); // Add five hits. The PL is in the empty state and an empty min size PL can // only fit two hits. So PrependHitArray should fail. ICING_ASSERT_OK_AND_ASSIGN( uint32_t num_can_prepend, (serializer.PrependHitArray( &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false))); EXPECT_THAT(num_can_prepend, Eq(2)); int can_fit_hits = num_can_prepend; // The PL has room for 2 hits. We should be able to add them without any // problem, transitioning the PL from EMPTY -> ALMOST_FULL -> FULL const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2); ICING_ASSERT_OK_AND_ASSIGN( num_can_prepend, (serializer.PrependHitArray( &pl_used, hits_in_ptr, can_fit_hits, /*keep_prepended=*/false))); EXPECT_THAT(num_can_prepend, Eq(can_fit_hits)); EXPECT_THAT(pl_size, Eq(serializer.GetBytesUsed(&pl_used))); std::deque hits_pushed; std::transform(hits_in.rbegin(), hits_in.rend() - hits_in.size() + can_fit_hits, std::front_inserter(hits_pushed), HitElt::get_hit); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); } TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) { PostingListHitSerializer serializer; // Size = 36 int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits_in; hits_in.emplace_back(Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); std::reverse(hits_in.begin(), hits_in.end()); // The last hit is uncompressed and the four before it should only take one // byte. Total use = 8 bytes. // ---------------------- // 35 delta(Hit #0) // 34 delta(Hit #1) // 33 delta(Hit #2) // 32 delta(Hit #3) // 31-28 Hit #4 // 27-12 // 11-6 kSpecialHit // 5-0 Offset=28 // ---------------------- int byte_size = sizeof(Hit::Value) + hits_in.size() - 1; // Add five hits. The PL is in the empty state and should be able to fit all // five hits without issue, transitioning the PL from EMPTY -> NOT_FULL. ICING_ASSERT_OK_AND_ASSIGN( uint32_t num_could_fit, (serializer.PrependHitArray( &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false))); EXPECT_THAT(num_could_fit, Eq(hits_in.size())); EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used))); std::deque hits_pushed; std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); Hit first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1); hits_in.clear(); hits_in.emplace_back(first_hit); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3)); std::reverse(hits_in.begin(), hits_in.end()); // Size increased by the deltas of these hits (1+2+1+2+3+2+3) = 14 bytes // ---------------------- // 35 delta(Hit #0) // 34 delta(Hit #1) // 33 delta(Hit #2) // 32 delta(Hit #3) // 31 delta(Hit #4) // 30-29 delta(Hit #5) // 28 delta(Hit #6) // 27-26 delta(Hit #7) // 25-23 delta(Hit #8) // 22-21 delta(Hit #9) // 20-18 delta(Hit #10) // 17-14 Hit #11 // 13-12 // 11-6 kSpecialHit // 5-0 Offset=14 // ---------------------- byte_size += 14; // Add these 7 hits. The PL is currently in the NOT_FULL state and should // remain in the NOT_FULL state. ICING_ASSERT_OK_AND_ASSIGN( num_could_fit, (serializer.PrependHitArray( &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false))); EXPECT_THAT(num_could_fit, Eq(hits_in.size())); EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used))); // All hits from hits_in were added. std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/3); hits_in.clear(); hits_in.emplace_back(first_hit); // ---------------------- // 35 delta(Hit #0) // 34 delta(Hit #1) // 33 delta(Hit #2) // 32 delta(Hit #3) // 31 delta(Hit #4) // 30-29 delta(Hit #5) // 28 delta(Hit #6) // 27-26 delta(Hit #7) // 25-23 delta(Hit #8) // 22-21 delta(Hit #9) // 20-18 delta(Hit #10) // 17-15 delta(Hit #11) // 14-12 // 11-6 Hit #12 // 5-0 kSpecialHit // ---------------------- byte_size = 30; // 36 - 6 // Add this 1 hit. The PL is currently in the NOT_FULL state and should // transition to the ALMOST_FULL state - even though there is still some // unused space. This is because the unused space (3 bytes) is less than // the size of a uncompressed Hit. ICING_ASSERT_OK_AND_ASSIGN( num_could_fit, (serializer.PrependHitArray( &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false))); EXPECT_THAT(num_could_fit, Eq(hits_in.size())); EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used))); // All hits from hits_in were added. std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1); hits_in.clear(); hits_in.emplace_back(first_hit); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3)); std::reverse(hits_in.begin(), hits_in.end()); // ---------------------- // 35 delta(Hit #0) // 34 delta(Hit #1) // 33 delta(Hit #2) // 32 delta(Hit #3) // 31 delta(Hit #4) // 30-29 delta(Hit #5) // 28 delta(Hit #6) // 27-26 delta(Hit #7) // 25-23 delta(Hit #8) // 22-21 delta(Hit #9) // 20-18 delta(Hit #10) // 17-15 delta(Hit #11) // 14 delta(Hit #12) // 13-12 // 11-6 Hit #13 // 5-0 Hit #14 // ---------------------- // Add these 2 hits. // - The PL is currently in the ALMOST_FULL state. Adding the first hit should // keep the PL in ALMOST_FULL because the delta between // Hit #13 and Hit #14 (1 byte) can fit in the unused area (3 bytes). // - Adding the second hit should transition to the FULL state because the // delta between Hit #14 and Hit #15 (3 bytes) is larger than the remaining // unused area (2 byte). ICING_ASSERT_OK_AND_ASSIGN( num_could_fit, (serializer.PrependHitArray( &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false))); EXPECT_THAT(num_could_fit, Eq(hits_in.size())); EXPECT_THAT(pl_size, Eq(serializer.GetBytesUsed(&pl_used))); // All hits from hits_in were added. std::transform(hits_in.rbegin(), hits_in.rend(), std::front_inserter(hits_pushed), HitElt::get_hit); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); } TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) { PostingListHitSerializer serializer; static constexpr int kNumHits = 128; static constexpr int kDeltaSize = 1; static constexpr size_t kHitsSize = ((kNumHits - 2) * kDeltaSize + (2 * sizeof(Hit))) / sizeof(Hit) * sizeof(Hit); // Create an array with one too many hits std::vector hits_in_too_many = CreateHits(kNumHits + 1, /*desired_byte_length=*/1); std::vector hit_elts_in_too_many; for (const Hit &hit : hits_in_too_many) { hit_elts_in_too_many.emplace_back(hit); } // Reverse so that hits are inserted in descending order std::reverse(hit_elts_in_too_many.begin(), hit_elts_in_too_many.end()); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion( &serializer, serializer.GetMinPostingListSize())); // PrependHitArray should fail because hit_elts_in_too_many is far too large // for the minimum size pl. ICING_ASSERT_OK_AND_ASSIGN( uint32_t num_could_fit, (serializer.PrependHitArray( &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), /*keep_prepended=*/false))); ASSERT_THAT(num_could_fit, Eq(2)); ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size())); ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0)); ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty())); ICING_ASSERT_OK_AND_ASSIGN( pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize)); // PrependHitArray should fail because hit_elts_in_too_many is one hit too // large for this pl. ICING_ASSERT_OK_AND_ASSIGN( num_could_fit, (serializer.PrependHitArray( &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(), /*keep_prepended=*/false))); ASSERT_THAT(num_could_fit, Eq(hit_elts_in_too_many.size() - 1)); ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0)); ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty())); } TEST(PostingListHitSerializerTest, PostingListStatusJumpFromNotFullToFullAndBack) { PostingListHitSerializer serializer; // Size = 18 const uint32_t pl_size = 3 * sizeof(Hit); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); Hit max_valued_hit(kMaxSectionId, kMinDocumentId, Hit::kMaxTermFrequency, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); ICING_ASSERT_OK(serializer.PrependHit(&pl, max_valued_hit)); uint32_t bytes_used = serializer.GetBytesUsed(&pl); ASSERT_THAT(bytes_used, sizeof(Hit::Value) + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency)); // Status not full. ASSERT_THAT(bytes_used, Le(pl_size - PostingListHitSerializer::kSpecialHitsSize)); Hit min_valued_hit(kMinSectionId, kMaxDocumentId, Hit::kMaxTermFrequency, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); uint8_t delta_buf[VarInt::kMaxEncodedLen64]; size_t delta_len = PostingListHitSerializer::EncodeNextHitValue( /*next_hit_value=*/min_valued_hit.value(), /*curr_hit_value=*/max_valued_hit.value(), delta_buf); // The compressed region available is pl_size - 2 * sizeof(specialHits) = 6 // We need to also fit max_valued_hit's flags and term-frequency fields, which // each take 1 byte // So we'll jump directly to FULL if the varint-encoded delta of the 2 hits > // 6 - sizeof(Hit::Flags) - sizeof(Hit::TermFrequency) = 4 ASSERT_THAT(delta_len, Gt(4)); ICING_ASSERT_OK(serializer.PrependHit( &pl, Hit(kMinSectionId, kMaxDocumentId, Hit::kMaxTermFrequency, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true))); // Status should jump to full directly. ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(pl_size)); ICING_ASSERT_OK(serializer.PopFrontHits(&pl, 1)); // Status should return to not full as before. ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(bytes_used)); } TEST(PostingListHitSerializerTest, DeltaOverflow) { PostingListHitSerializer serializer; const uint32_t pl_size = 4 * sizeof(Hit); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); static const Hit::Value kMaxHitValue = std::numeric_limits::max(); static const Hit::Value kOverflow[4] = { kMaxHitValue >> 2, (kMaxHitValue >> 2) * 2, (kMaxHitValue >> 2) * 3, kMaxHitValue - 1, }; // Fit at least 4 ordinary values. for (Hit::Value v = 0; v < 4; v++) { ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(4 - v))); } // Cannot fit 4 overflow values. ICING_ASSERT_OK_AND_ASSIGN( pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); Hit::Flags has_term_frequency_flags = 0b1; ICING_EXPECT_OK(serializer.PrependHit( &pl, Hit(/*value=*/kOverflow[3], has_term_frequency_flags, /*term_frequency=*/8))); ICING_EXPECT_OK(serializer.PrependHit( &pl, Hit(/*value=*/kOverflow[2], has_term_frequency_flags, /*term_frequency=*/8))); // Can fit only one more. ICING_EXPECT_OK(serializer.PrependHit( &pl, Hit(/*value=*/kOverflow[1], has_term_frequency_flags, /*term_frequency=*/8))); EXPECT_THAT(serializer.PrependHit( &pl, Hit(/*value=*/kOverflow[0], has_term_frequency_flags, /*term_frequency=*/8)), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } TEST(PostingListHitSerializerTest, GetMinPostingListToFitForNotFullPL) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Create and add some hits to make pl_used NOT_FULL std::vector hits_in = CreateHits(/*num_hits=*/7, /*desired_byte_length=*/1); for (const Hit &hit : hits_in) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit)); } // ---------------------- // 23 delta(Hit #0) // 22 delta(Hit #1) // 21 delta(Hit #2) // 20 delta(Hit #3) // 19 delta(Hit #4) // 18 delta(Hit #5) // 17-14 Hit #6 // 13-12 // 11-6 kSpecialHit // 5-0 Offset=14 // ---------------------- int bytes_used = 10; // Check that all hits have been inserted EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); std::deque hits_pushed(hits_in.rbegin(), hits_in.rend()); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); // Get the min size to fit for the hits in pl_used. Moving the hits in pl_used // into a posting list with this min size should make it ALMOST_FULL, which we // can see should have size = 18. // ---------------------- // 17 delta(Hit #0) // 16 delta(Hit #1) // 15 delta(Hit #2) // 14 delta(Hit #3) // 13 delta(Hit #4) // 12 delta(Hit #5) // 11-6 Hit #6 // 5-0 kSpecialHit // ---------------------- int expected_min_size = 18; uint32_t min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used); EXPECT_THAT(min_size_to_fit, Eq(expected_min_size)); // Also check that this min size to fit posting list actually does fit all the // hits and can only hit one more hit in the ALMOST_FULL state. ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed min_size_to_fit_pl, PostingListUsed::CreateFromUnitializedRegion( &serializer, min_size_to_fit)); for (const Hit &hit : hits_in) { ICING_ASSERT_OK(serializer.PrependHit(&min_size_to_fit_pl, hit)); } // Adding another hit to the min-size-to-fit posting list should succeed Hit hit = CreateHit(hits_in.back(), /*desired_byte_length=*/1); ICING_ASSERT_OK(serializer.PrependHit(&min_size_to_fit_pl, hit)); // Adding any other hits should fail with RESOURCE_EXHAUSTED error. EXPECT_THAT(serializer.PrependHit(&min_size_to_fit_pl, CreateHit(hit, /*desired_byte_length=*/1)), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); // Check that all hits have been inserted and the min-fit posting list is now // FULL. EXPECT_THAT(serializer.GetBytesUsed(&min_size_to_fit_pl), Eq(min_size_to_fit)); hits_pushed.emplace_front(hit); EXPECT_THAT(serializer.GetHits(&min_size_to_fit_pl), IsOkAndHolds(ElementsAreArray(hits_pushed))); } TEST(PostingListHitSerializerTest, GetMinPostingListToFitForTwoHits) { PostingListHitSerializer serializer; // Size = 36 int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Create and add 2 hits Hit first_hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); std::vector hits_in = CreateHits(first_hit, /*num_hits=*/2, /*desired_byte_length=*/4); for (const Hit &hit : hits_in) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit)); } // ---------------------- // 35 term-frequency(Hit #0) // 34 flags(Hit #0) // 33-30 delta(Hit #0) // 29 term-frequency(Hit #1) // 28 flags(Hit #1) // 27-24 Hit #1 // 23-12 // 11-6 kSpecialHit // 5-0 Offset=24 // ---------------------- int bytes_used = 12; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); std::deque hits_pushed(hits_in.rbegin(), hits_in.rend()); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); // GetMinPostingListSizeToFit should return min posting list size. EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(serializer.GetMinPostingListSize())); } TEST(PostingListHitSerializerTest, GetMinPostingListToFitForThreeSmallHits) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Create and add 3 small hits that fit in the size range where we should be // checking for whether the PL has only 2 hits std::vector hits_in = CreateHits(/*num_hits=*/3, /*desired_byte_length=*/1); for (const Hit &hit : hits_in) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit)); } // ---------------------- // 23 delta(Hit #0) // 22 delta(Hit #1) // 21-18 Hit #2 // 17-12 // 11-6 kSpecialHit // 5-0 Offset=18 // ---------------------- int bytes_used = 6; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); std::deque hits_pushed(hits_in.rbegin(), hits_in.rend()); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); // Get the min size to fit for the hits in pl_used. Moving the hits in pl_used // into a posting list with this min size should make it ALMOST_FULL, which we // can see should have size = 14. This should not return the min posting list // size. // ---------------------- // 13 delta(Hit #0) // 12 delta(Hit #1) // 11-6 Hit #2 // 5-0 kSpecialHit // ---------------------- int expected_min_size = 14; EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Gt(serializer.GetMinPostingListSize())); EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(expected_min_size)); } TEST(PostingListHitSerializerTest, GetMinPostingListToFitForAlmostFullAndFullPLReturnsSameSize) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // Create and add some hits to make pl_used ALMOST_FULL std::vector hits_in = CreateHits(/*num_hits=*/7, /*desired_byte_length=*/2); for (const Hit &hit : hits_in) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit)); } // ---------------------- // 23-22 delta(Hit #0) // 21-20 delta(Hit #1) // 19-18 delta(Hit #2) // 17-16 delta(Hit #3) // 15-14 delta(Hit #4) // 13-12 delta(Hit #5) // 11-6 Hit #6 // 5-0 kSpecialHit // ---------------------- int bytes_used = 18; EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); std::deque hits_pushed(hits_in.rbegin(), hits_in.rend()); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); // GetMinPostingListSizeToFit should return the same size as pl_used. uint32_t min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used); EXPECT_THAT(min_size_to_fit, Eq(pl_size)); // Add another hit to make the posting list FULL Hit hit = CreateHit(hits_in.back(), /*desired_byte_length=*/1); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit)); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size)); hits_pushed.emplace_front(hit); EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAreArray(hits_pushed))); // GetMinPostingListSizeToFit should still be the same size as pl_used. min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used); EXPECT_THAT(min_size_to_fit, Eq(pl_size)); } TEST(PostingListHitSerializerTest, MoveFrom) { PostingListHitSerializer serializer; int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used1, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits1 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); for (const Hit &hit : hits1) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit)); } ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used2, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits2 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2); for (const Hit &hit : hits2) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit)); } ICING_ASSERT_OK(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1)); EXPECT_THAT(serializer.GetHits(&pl_used2), IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend()))); EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(IsEmpty())); } TEST(PostingListHitSerializerTest, MoveFromNullArgumentReturnsInvalidArgument) { PostingListHitSerializer serializer; int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used1, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); for (const Hit &hit : hits) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit)); } EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used1, /*src=*/nullptr), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(ElementsAreArray(hits.rbegin(), hits.rend()))); } TEST(PostingListHitSerializerTest, MoveFromInvalidPostingListReturnsInvalidArgument) { PostingListHitSerializer serializer; int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used1, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits1 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); for (const Hit &hit : hits1) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit)); } ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used2, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits2 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2); for (const Hit &hit : hits2) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit)); } // Write invalid hits to the beginning of pl_used1 to make it invalid. Hit invalid_hit(Hit::kInvalidValue); Hit *first_hit = reinterpret_cast(pl_used1.posting_list_buffer()); *first_hit = invalid_hit; ++first_hit; *first_hit = invalid_hit; EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); EXPECT_THAT(serializer.GetHits(&pl_used2), IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend()))); } TEST(PostingListHitSerializerTest, MoveToInvalidPostingListReturnsFailedPrecondition) { PostingListHitSerializer serializer; int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used1, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits1 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); for (const Hit &hit : hits1) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit)); } ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used2, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits2 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2); for (const Hit &hit : hits2) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit)); } // Write invalid hits to the beginning of pl_used2 to make it invalid. Hit invalid_hit(Hit::kInvalidValue); Hit *first_hit = reinterpret_cast(pl_used2.posting_list_buffer()); *first_hit = invalid_hit; ++first_hit; *first_hit = invalid_hit; EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend()))); } TEST(PostingListHitSerializerTest, MoveToPostingListTooSmall) { PostingListHitSerializer serializer; int pl_size = 3 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used1, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); std::vector hits1 = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1); for (const Hit &hit : hits1) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit)); } ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used2, PostingListUsed::CreateFromUnitializedRegion( &serializer, serializer.GetMinPostingListSize())); std::vector hits2 = CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2); for (const Hit &hit : hits2) { ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit)); } EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend()))); EXPECT_THAT(serializer.GetHits(&pl_used2), IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend()))); } TEST(PostingListHitSerializerTest, PopHitsWithTermFrequenciesAndFlags) { PostingListHitSerializer serializer; // Size = 24 int pl_size = 2 * serializer.GetMinPostingListSize(); ICING_ASSERT_OK_AND_ASSIGN( PostingListUsed pl_used, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size)); // This posting list is 24-bytes. Create four hits that will have deltas of // two bytes each and all of whom will have a non-default term-frequency. This // posting list will be almost_full. // // ---------------------- // 23 term-frequency(Hit #0) // 22 flags(Hit #0) // 21-20 delta(Hit #0) // 19 term-frequency(Hit #1) // 18 flags(Hit #1) // 17-16 delta(Hit #1) // 15 term-frequency(Hit #2) // 14 flags(Hit #2) // 13-12 delta(Hit #2) // 11-6 Hit #3 // 5-0 kInvalidHit // ---------------------- int bytes_used = 18; Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/5, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2); Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2); Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0)); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1)); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2)); ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3)); ICING_ASSERT_OK_AND_ASSIGN(std::vector hits_out, serializer.GetHits(&pl_used)); EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0)); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); // Now, pop the last hit. The posting list should contain the first three // hits. // // ---------------------- // 23 term-frequency(Hit #0) // 22 flags(Hit #0) // 21-20 delta(Hit #0) // 19 term-frequency(Hit #1) // 18 flags(Hit #1) // 17-16 delta(Hit #1) // 15-12 // 11-6 Hit #2 // 5-0 kInvalidHit // ---------------------- ICING_ASSERT_OK(serializer.PopFrontHits(&pl_used, 1)); ICING_ASSERT_OK_AND_ASSIGN(hits_out, serializer.GetHits(&pl_used)); EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0)); EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used)); } } // namespace } // namespace lib } // namespace icing