18 files changed, 6875 insertions, 667 deletions
diff --git a/icing/join/document-id-to-join-info.h b/icing/join/document-id-to-join-info.h
new file mode 100644
index 0000000..dee4885
--- /dev/null
+++ b/icing/join/document-id-to-join-info.h
@@ -0,0 +1,67 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
+#define ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
+
+#include <utility>
+
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocumentIdToJoinInfo is composed of document_id and its join info.
+// - QualifiedId join: join info is the referenced document's namespace_id +
+//   fingerprint(uri).
+// - String join: join info is the term id.
+// - Integer join: join info is the integer.
+//
+// DocumentIdToJoinInfo will be stored in posting list.
+template <typename JoinInfoType>
+class DocumentIdToJoinInfo {
+ public:
+  static DocumentIdToJoinInfo<JoinInfoType> GetInvalid() {
+    return DocumentIdToJoinInfo<JoinInfoType>(kInvalidDocumentId,
+                                              JoinInfoType());
+  }
+
+  explicit DocumentIdToJoinInfo(DocumentId document_id, JoinInfoType join_info)
+      : document_id_(document_id), join_info_(std::move(join_info)) {}
+
+  DocumentId document_id() const { return document_id_; }
+  const JoinInfoType& join_info() const { return join_info_; }
+
+  bool is_valid() const { return IsDocumentIdValid(document_id_); }
+
+  bool operator<(const DocumentIdToJoinInfo<JoinInfoType>& other) const {
+    if (document_id_ != other.document_id_) {
+      return document_id_ < other.document_id_;
+    }
+    return join_info_ < other.join_info_;
+  }
+
+  bool operator==(const DocumentIdToJoinInfo<JoinInfoType>& other) const {
+    return document_id_ == other.document_id_ && join_info_ == other.join_info_;
+  }
+
+ private:
+  DocumentId document_id_;
+  JoinInfoType join_info_;
+} __attribute__((packed));
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_DOCUMENT_ID_TO_JOIN_INFO_H_
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
index e27b1ea..1b7ca0d 100644
--- a/icing/join/join-processor.cc
+++ b/icing/join/join-processor.cc
@@ -29,6 +29,7 @@
 #include "icing/join/aggregation-scorer.h"
 #include "icing/join/doc-join-info.h"
 #include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index.h"
 #include "icing/join/qualified-id.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
@@ -37,6 +38,7 @@
 #include "icing/scoring/scored-document-hit.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -53,17 +55,121 @@ JoinProcessor::GetChildrenFetcher(
         "Parent property expression must be ", kQualifiedIdExpr));
   }
 
-  std::sort(
-      child_scored_document_hits.begin(), child_scored_document_hits.end(),
-      ScoredDocumentHitComparator(
-          /*is_descending=*/join_spec.nested_spec().scoring_spec().order_by() ==
-          ScoringSpecProto::Order::DESC));
-
-  // TODO(b/256022027):
-  // - Optimization
-  //   - Cache property to speed up property retrieval.
-  //   - If there is no cache, then we still have the flexibility to fetch it
-  //     from actual docs via DocumentStore.
+  ScoredDocumentHitComparator score_comparator(
+      /*is_descending=*/join_spec.nested_spec().scoring_spec().order_by() ==
+      ScoringSpecProto::Order::DESC);
+
+  if (qualified_id_join_index_->is_v2()) {
+    // v2
+    // Step 1a: sort child ScoredDocumentHits in document id descending order.
+    std::sort(child_scored_document_hits.begin(),
+              child_scored_document_hits.end(),
+              [](const ScoredDocumentHit& lhs, const ScoredDocumentHit& rhs) {
+                return lhs.document_id() > rhs.document_id();
+              });
+
+    // Step 1b: group all child ScoredDocumentHits by the document's
+    //          schema_type_id.
+    std::unordered_map<SchemaTypeId, std::vector<ScoredDocumentHit>>
+        schema_to_child_scored_doc_hits_map;
+    for (const ScoredDocumentHit& child_scored_document_hit :
+         child_scored_document_hits) {
+      std::optional<DocumentFilterData> child_doc_filter_data =
+          doc_store_->GetAliveDocumentFilterData(
+              child_scored_document_hit.document_id(), current_time_ms_);
+      if (!child_doc_filter_data) {
+        continue;
+      }
+
+      schema_to_child_scored_doc_hits_map[child_doc_filter_data
+                                              ->schema_type_id()]
+          .push_back(child_scored_document_hit);
+    }
+
+    // Step 1c: for each schema_type_id, lookup QualifiedIdJoinIndexImplV2 to
+    //          fetch all child join data from posting list(s). Convert all
+    //          child join data to referenced parent document ids and bucketize
+    //          child ScoredDocumentHits by it.
+    std::unordered_map<DocumentId, std::vector<ScoredDocumentHit>>
+        parent_to_child_docs_map;
+    for (auto& [schema_type_id, grouped_child_scored_doc_hits] :
+         schema_to_child_scored_doc_hits_map) {
+      // Get joinable_property_id of this schema.
+      ICING_ASSIGN_OR_RETURN(
+          const JoinablePropertyMetadata* metadata,
+          schema_store_->GetJoinablePropertyMetadata(
+              schema_type_id, join_spec.child_property_expression()));
+      if (metadata == nullptr ||
+          metadata->value_type != JoinableConfig::ValueType::QUALIFIED_ID) {
+        // Currently we only support qualified id, so skip other types.
+        continue;
+      }
+
+      // Lookup QualifiedIdJoinIndexImplV2.
+      ICING_ASSIGN_OR_RETURN(
+          std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase>
+              join_index_iter,
+          qualified_id_join_index_->GetIterator(
+              schema_type_id, /*joinable_property_id=*/metadata->id));
+
+      // - Join index contains all join data of schema_type_id and
+      //   join_index_iter will return all of them in (child) document id
+      //   descending order.
+      // - But we only need join data of child document ids which appear in
+      //   grouped_child_scored_doc_hits. Also grouped_child_scored_doc_hits
+      //   contain ScoredDocumentHits in (child) document id descending order.
+      // - Therefore, we advance 2 iterators to intersect them and get desired
+      //   join data.
+      auto child_scored_doc_hits_iter = grouped_child_scored_doc_hits.cbegin();
+      while (join_index_iter->Advance().ok() &&
+             child_scored_doc_hits_iter !=
+                 grouped_child_scored_doc_hits.cend()) {
+        // Advance child_scored_doc_hits_iter until it points to a
+        // ScoredDocumentHit with document id <= the one pointed by
+        // join_index_iter.
+        while (child_scored_doc_hits_iter !=
+                   grouped_child_scored_doc_hits.cend() &&
+               child_scored_doc_hits_iter->document_id() >
+                   join_index_iter->GetCurrent().document_id()) {
+          ++child_scored_doc_hits_iter;
+        }
+
+        if (child_scored_doc_hits_iter !=
+                grouped_child_scored_doc_hits.cend() &&
+            child_scored_doc_hits_iter->document_id() ==
+                join_index_iter->GetCurrent().document_id()) {
+          // We get a join data whose child document id exists in both join
+          // index and grouped_child_scored_doc_hits. Convert its join info to
+          // referenced parent document ids and bucketize ScoredDocumentHits by
+          // it (putting into parent_to_child_docs_map).
+          const NamespaceFingerprintIdentifier& ref_ns_id =
+              join_index_iter->GetCurrent().join_info();
+          libtextclassifier3::StatusOr<DocumentId> ref_parent_doc_id_or =
+              doc_store_->GetDocumentId(ref_ns_id);
+          if (ref_parent_doc_id_or.ok()) {
+            parent_to_child_docs_map[std::move(ref_parent_doc_id_or)
+                                         .ValueOrDie()]
+                .push_back(*child_scored_doc_hits_iter);
+          }
+        }
+      }
+    }
+
+    // Step 1d: finally, sort each parent's joined child ScoredDocumentHits by
+    //          score.
+    for (auto& [parent_doc_id, bucketized_child_scored_hits] :
+         parent_to_child_docs_map) {
+      std::sort(bucketized_child_scored_hits.begin(),
+                bucketized_child_scored_hits.end(), score_comparator);
+    }
+
+    return JoinChildrenFetcher(join_spec, std::move(parent_to_child_docs_map));
+  }
+
+  // v1
+  // TODO(b/275121148): deprecate this part after rollout v2.
+  std::sort(child_scored_document_hits.begin(),
+            child_scored_document_hits.end(), score_comparator);
 
   // Step 1: group child documents by parent documentId. Currently we only
   //         support QualifiedId joining, so fetch the qualified id content of
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index f503442..a40d934 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -22,9 +22,13 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/absl_ports/canonical_errors.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/join-children-fetcher.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
 #include "icing/join/qualified-id-join-index.h"
 #include "icing/join/qualified-id-join-indexing-handler.h"
 #include "icing/portable/platform.h"
@@ -58,6 +62,9 @@ namespace {
 using ::testing::ElementsAre;
 using ::testing::IsTrue;
 
+// TODO(b/275121148): remove template after deprecating
+// QualifiedIdJoinIndexImplV1.
+template <typename T>
 class JoinProcessorTest : public ::testing::Test {
  protected:
   void SetUp() override {
@@ -108,6 +115,25 @@ class JoinProcessorTest : public ::testing::Test {
                                      .SetDataTypeJoinableString(
                                          JOINABLE_VALUE_TYPE_QUALIFIED_ID)
                                      .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType("Message")
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("content")
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("sender")
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName("receiver")
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+
             .Build();
     ASSERT_THAT(schema_store_->SetSchema(
                     schema, /*ignore_errors_and_delete_documents=*/false,
@@ -121,18 +147,15 @@ class JoinProcessorTest : public ::testing::Test {
         DocumentStore::Create(
             &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
             /*force_recovery_and_revalidate_documents=*/false,
-            /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+            /*namespace_id_fingerprint=*/true, /*pre_mapping_fbv=*/false,
             /*use_persistent_hash_map=*/false,
             PortableFileBackedProtoLog<
                 DocumentWrapper>::kDeflateCompressionLevel,
             /*initialize_stats=*/nullptr));
     doc_store_ = std::move(create_result.document_store);
 
-    ICING_ASSERT_OK_AND_ASSIGN(
-        qualified_id_join_index_,
-        QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
-                                     /*pre_mapping_fbv=*/false,
-                                     /*use_persistent_hash_map=*/false));
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               CreateQualifiedIdJoinIndex<T>());
   }
 
   void TearDown() override {
@@ -143,6 +166,28 @@ class JoinProcessorTest : public ::testing::Test {
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
+  template <typename UnknownJoinIndexType>
+  libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+  CreateQualifiedIdJoinIndex() {
+    return absl_ports::InvalidArgumentError("Unknown type");
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+  CreateQualifiedIdJoinIndex<QualifiedIdJoinIndexImplV1>() {
+    return QualifiedIdJoinIndexImplV1::Create(
+        filesystem_, qualified_id_join_index_dir_, /*pre_mapping_fbv=*/false,
+        /*use_persistent_hash_map=*/false);
+  }
+
+  template <>
+  libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
+  CreateQualifiedIdJoinIndex<QualifiedIdJoinIndexImplV2>() {
+    return QualifiedIdJoinIndexImplV2::Create(filesystem_,
+                                              qualified_id_join_index_dir_,
+                                              /*pre_mapping_fbv=*/false);
+  }
+
   libtextclassifier3::StatusOr<DocumentId> PutAndIndexDocument(
       const DocumentProto& document) {
     ICING_ASSIGN_OR_RETURN(DocumentId document_id, doc_store_->Put(document));
@@ -153,7 +198,7 @@ class JoinProcessorTest : public ::testing::Test {
 
     ICING_ASSIGN_OR_RETURN(
         std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-        QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+        QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                                qualified_id_join_index_.get()));
     ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id,
                                           /*recovery_mode=*/false,
@@ -163,8 +208,8 @@ class JoinProcessorTest : public ::testing::Test {
 
   libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>> Join(
       const JoinSpecProto& join_spec,
-      std::vector<ScoredDocumentHit>&& parent_scored_document_hits,
-      std::vector<ScoredDocumentHit>&& child_scored_document_hits) {
+      std::vector<ScoredDocumentHit> parent_scored_document_hits,
+      std::vector<ScoredDocumentHit> child_scored_document_hits) {
     JoinProcessor join_processor(
         doc_store_.get(), schema_store_.get(), qualified_id_join_index_.get(),
         /*current_time_ms=*/fake_clock_.GetSystemTimeMilliseconds());
@@ -191,7 +236,11 @@ class JoinProcessorTest : public ::testing::Test {
   FakeClock fake_clock_;
 };
 
-TEST_F(JoinProcessorTest, JoinByQualifiedId) {
+using TestTypes =
+    ::testing::Types<QualifiedIdJoinIndexImplV1, QualifiedIdJoinIndexImplV2>;
+TYPED_TEST_SUITE(JoinProcessorTest, TestTypes);
+
+TYPED_TEST(JoinProcessorTest, JoinByQualifiedId_allDocuments) {
   DocumentProto person1 = DocumentBuilder()
                               .SetKey("pkg$db/namespace", "person1")
                               .SetSchema("Person")
@@ -227,15 +276,15 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             PutAndIndexDocument(person1));
+                             this->PutAndIndexDocument(person1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             PutAndIndexDocument(person2));
+                             this->PutAndIndexDocument(person2));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             PutAndIndexDocument(email1));
+                             this->PutAndIndexDocument(email1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
-                             PutAndIndexDocument(email2));
+                             this->PutAndIndexDocument(email2));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
-                             PutAndIndexDocument(email3));
+                             this->PutAndIndexDocument(email3));
 
   ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
                                     /*score=*/0.0);
@@ -267,8 +316,8 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
-      Join(join_spec, std::move(parent_scored_document_hits),
-           std::move(child_scored_document_hits)));
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
   EXPECT_THAT(
       joined_result_document_hits,
       ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
@@ -282,7 +331,112 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
                       {scored_doc_hit5, scored_doc_hit3}))));
 }
 
-TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
+TYPED_TEST(JoinProcessorTest, JoinByQualifiedId_partialDocuments) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+  DocumentProto person2 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person2")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Bob")
+                              .Build();
+  DocumentProto person3 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person3")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Eve")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person2")
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender", "pkg$db/namespace#person3")
+          .Build();
+  DocumentProto email4 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email4")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 4")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK(/*document_id2 unused*/
+                  this->PutAndIndexDocument(person2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(person3));
+  ICING_ASSERT_OK(/*document_id4 unused*/
+                  this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK(/*document_id6 unused*/
+                  this->PutAndIndexDocument(email3));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id7,
+                             this->PutAndIndexDocument(email4));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+                                    /*score=*/4.0);
+  ScoredDocumentHit scored_doc_hit7(document_id7, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+
+  // Only join person1, person3, email2 and email4.
+  // Parent ScoredDocumentHits: person1, person3
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit3, scored_doc_hit1};
+
+  // Child ScoredDocumentHits: email2, email4
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit7,
+                                                               scored_doc_hit5};
+
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
+  EXPECT_THAT(
+      joined_result_document_hits,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/0.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit3,
+                      /*child_scored_document_hits=*/{})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/{scored_doc_hit7}))));
+}
+
+TYPED_TEST(JoinProcessorTest,
+           ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
   DocumentProto person1 = DocumentBuilder()
                               .SetKey("pkg$db/namespace", "person1")
                               .SetSchema("Person")
@@ -303,11 +457,11 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
                              .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             PutAndIndexDocument(person1));
+                             this->PutAndIndexDocument(person1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             PutAndIndexDocument(email1));
+                             this->PutAndIndexDocument(email1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             PutAndIndexDocument(email2));
+                             this->PutAndIndexDocument(email2));
 
   ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
                                     /*score=*/0.0);
@@ -335,8 +489,8 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
-      Join(join_spec, std::move(parent_scored_document_hits),
-           std::move(child_scored_document_hits)));
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
   // Since Email2 doesn't have "sender" property, it should be ignored.
   EXPECT_THAT(
       joined_result_document_hits,
@@ -345,7 +499,8 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
           /*child_scored_document_hits=*/{scored_doc_hit2}))));
 }
 
-TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
+TYPED_TEST(JoinProcessorTest,
+           ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
   DocumentProto person1 = DocumentBuilder()
                               .SetKey("pkg$db/namespace", "person1")
                               .SetSchema("Person")
@@ -379,13 +534,13 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             PutAndIndexDocument(person1));
+                             this->PutAndIndexDocument(person1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             PutAndIndexDocument(email1));
+                             this->PutAndIndexDocument(email1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             PutAndIndexDocument(email2));
+                             this->PutAndIndexDocument(email2));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
-                             PutAndIndexDocument(email3));
+                             this->PutAndIndexDocument(email3));
 
   ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
                                     /*score=*/0.0);
@@ -415,8 +570,8 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
-      Join(join_spec, std::move(parent_scored_document_hits),
-           std::move(child_scored_document_hits)));
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
   // Email 2 and email 3 (document id 3 and 4) contain invalid qualified ids.
   // Join processor should ignore them.
   EXPECT_THAT(joined_result_document_hits,
@@ -426,7 +581,7 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
                   /*child_scored_document_hits=*/{scored_doc_hit2}))));
 }
 
-TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
+TYPED_TEST(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
   DocumentProto person1 = DocumentBuilder()
                               .SetKey("pkg$db/namespace", "person1")
                               .SetSchema("Person")
@@ -448,11 +603,11 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             PutAndIndexDocument(person1));
+                             this->PutAndIndexDocument(person1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             PutAndIndexDocument(person2));
+                             this->PutAndIndexDocument(person2));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             PutAndIndexDocument(email1));
+                             this->PutAndIndexDocument(email1));
 
   ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
                                     /*score=*/0.0);
@@ -479,8 +634,8 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
-      Join(join_spec, std::move(parent_scored_document_hits),
-           std::move(child_scored_document_hits)));
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
   // Person1 has no child documents, but left join should also include it.
   EXPECT_THAT(
       joined_result_document_hits,
@@ -494,7 +649,7 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
                       /*child_scored_document_hits=*/{}))));
 }
 
-TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
+TYPED_TEST(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
   DocumentProto person1 = DocumentBuilder()
                               .SetKey("pkg$db/namespace", "person1")
                               .SetSchema("Person")
@@ -524,13 +679,13 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             PutAndIndexDocument(person1));
+                             this->PutAndIndexDocument(person1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
-                             PutAndIndexDocument(email1));
+                             this->PutAndIndexDocument(email1));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
-                             PutAndIndexDocument(email2));
+                             this->PutAndIndexDocument(email2));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
-                             PutAndIndexDocument(email3));
+                             this->PutAndIndexDocument(email3));
 
   ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
                                     /*score=*/0.0);
@@ -560,8 +715,8 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
-      Join(join_spec, std::move(parent_scored_document_hits),
-           std::move(child_scored_document_hits)));
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
   // Child documents should be sorted according to the (nested) ranking
   // strategy.
   EXPECT_THAT(
@@ -572,7 +727,7 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
           {scored_doc_hit3, scored_doc_hit4, scored_doc_hit2}))));
 }
 
-TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
+TYPED_TEST(JoinProcessorTest, ShouldAllowSelfJoining) {
   DocumentProto email1 =
       DocumentBuilder()
           .SetKey("pkg$db/namespace", "email1")
@@ -582,7 +737,7 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
           .Build();
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
-                             PutAndIndexDocument(email1));
+                             this->PutAndIndexDocument(email1));
 
   ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
                                     /*score=*/0.0);
@@ -605,8 +760,8 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
-      Join(join_spec, std::move(parent_scored_document_hits),
-           std::move(child_scored_document_hits)));
+      this->Join(join_spec, std::move(parent_scored_document_hits),
+                 std::move(child_scored_document_hits)));
   EXPECT_THAT(joined_result_document_hits,
               ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
                   /*final_score=*/1.0,
@@ -614,6 +769,156 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
                   /*child_scored_document_hits=*/{scored_doc_hit1}))));
 }
 
+TYPED_TEST(JoinProcessorTest, MultipleChildSchemasJoining) {
+  DocumentProto person1 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person1")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Alice")
+                              .Build();
+  DocumentProto person2 = DocumentBuilder()
+                              .SetKey("pkg$db/namespace", "person2")
+                              .SetSchema("Person")
+                              .AddStringProperty("Name", "Bob")
+                              .Build();
+
+  DocumentProto email1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email1")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person2")
+          .Build();
+  DocumentProto email2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email2")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto email3 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "email3")
+          .SetSchema("Email")
+          .AddStringProperty("subject", "test subject 3")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .Build();
+  DocumentProto message1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "message1")
+          .SetSchema("Message")
+          .AddStringProperty("content", "test content 1")
+          .AddStringProperty("sender", "pkg$db/namespace#person1")
+          .AddStringProperty("receiver", "pkg$db/namespace#person2")
+          .Build();
+  DocumentProto message2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/namespace", "message2")
+          .SetSchema("Message")
+          .AddStringProperty("content", "test content 2")
+          .AddStringProperty("sender", "pkg$db/namespace#person2")
+          .AddStringProperty("receiver", "pkg$db/namespace#person1")
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             this->PutAndIndexDocument(person1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             this->PutAndIndexDocument(person2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             this->PutAndIndexDocument(email1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             this->PutAndIndexDocument(email2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+                             this->PutAndIndexDocument(email3));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+                             this->PutAndIndexDocument(message1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id7,
+                             this->PutAndIndexDocument(message2));
+
+  ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
+                                    /*score=*/0.0);
+  ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
+                                    /*score=*/5.0);
+  ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
+                                    /*score=*/3.0);
+  ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
+                                    /*score=*/2.0);
+  ScoredDocumentHit scored_doc_hit6(document_id6, kSectionIdMaskNone,
+                                    /*score=*/4.0);
+  ScoredDocumentHit scored_doc_hit7(document_id7, kSectionIdMaskNone,
+                                    /*score=*/1.0);
+
+  // Parent ScoredDocumentHits: all Person documents
+  std::vector<ScoredDocumentHit> parent_scored_document_hits = {
+      scored_doc_hit1, scored_doc_hit2};
+
+  // Child ScoredDocumentHits: all Email and Message documents
+  std::vector<ScoredDocumentHit> child_scored_document_hits = {
+      scored_doc_hit3, scored_doc_hit4, scored_doc_hit5, scored_doc_hit6,
+      scored_doc_hit7};
+
+  // Join by "sender".
+  // - Person1: [
+  //     email2 (scored_doc_hit4),
+  //     email3 (scored_doc_hit5),
+  //     message1 (scored_doc_hit6),
+  //   ]
+  // - Person2: [
+  //     email1 (scored_doc_hit3),
+  //     message2 (scored_doc_hit7),
+  //   ]
+  JoinSpecProto join_spec;
+  join_spec.set_parent_property_expression(
+      std::string(JoinProcessor::kQualifiedIdExpr));
+  join_spec.set_child_property_expression("sender");
+  join_spec.set_aggregation_scoring_strategy(
+      JoinSpecProto::AggregationScoringStrategy::COUNT);
+  join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
+      ScoringSpecProto::Order::DESC);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits1,
+      this->Join(join_spec, parent_scored_document_hits,
+                 child_scored_document_hits));
+  EXPECT_THAT(
+      joined_result_document_hits1,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/3.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/
+                      {scored_doc_hit6, scored_doc_hit4, scored_doc_hit5})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/2.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit2,
+                      /*child_scored_document_hits=*/
+                      {scored_doc_hit3, scored_doc_hit7}))));
+
+  // Join by "receiver".
+  // - Person1: [
+  //     message2 (scored_doc_hit7),
+  //   ]
+  // - Person2: [
+  //     message1 (scored_doc_hit6),
+  //   ]
+  join_spec.set_child_property_expression("receiver");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<JoinedScoredDocumentHit> joined_result_document_hits2,
+      this->Join(join_spec, parent_scored_document_hits,
+                 child_scored_document_hits));
+  EXPECT_THAT(
+      joined_result_document_hits2,
+      ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit1,
+                      /*child_scored_document_hits=*/{scored_doc_hit7})),
+                  EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
+                      /*final_score=*/1.0,
+                      /*parent_scored_document_hit=*/scored_doc_hit2,
+                      /*child_scored_document_hits=*/{scored_doc_hit6}))));
+}
+
 // TODO(b/256022027): add unit tests for non-joinable property. If joinable
 //                    value type is unset, then qualifed id join should not
 //                    include the child document even if it contains a valid
diff --git a/icing/join/posting-list-join-data-accessor.h b/icing/join/posting-list-join-data-accessor.h
new file mode 100644
index 0000000..6669f9f
--- /dev/null
+++ b/icing/join/posting-list-join-data-accessor.h
@@ -0,0 +1,211 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
+#define ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// This class is used to provide a simple abstraction for adding join data to
+// posting lists. PostingListJoinDataAccessor handles:
+// 1) selection of properly-sized posting lists for the accumulated join index
+//    data during Finalize()
+// 2) chaining of max-sized posting lists.
+template <typename JoinDataType>
+class PostingListJoinDataAccessor : public PostingListAccessor {
+ public:
+  // Creates an empty PostingListJoinDataAccessor.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListJoinDataAccessor
+  //   - INVALID_ARGUMENT error if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+  Create(FlashIndexStorage* storage,
+         PostingListJoinDataSerializer<JoinDataType>* serializer);
+
+  // Creates a PostingListJoinDataAccessor with an existing posting list
+  // identified by existing_posting_list_id.
+  //
+  // RETURNS:
+  //   - On success, a valid instance of PostingListJoinDataAccessor
+  //   - INVALID_ARGUMENT if storage has an invalid block_size.
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+  CreateFromExisting(FlashIndexStorage* storage,
+                     PostingListJoinDataSerializer<JoinDataType>* serializer,
+                     PostingListIdentifier existing_posting_list_id);
+
+  PostingListSerializer* GetSerializer() override { return serializer_; }
+
+  // Retrieves the next batch of data in the posting list chain.
+  //
+  // RETURNS:
+  //   - On success, a vector of join data in the posting list chain
+  //   - FAILED_PRECONDITION_ERROR if called on an instance that was created via
+  //     Create.
+  //   - INTERNAL_ERROR if unable to read the next posting list in the chain or
+  //     if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetNextDataBatch();
+
+  // Prepends one data. This may result in flushing the posting list to disk (if
+  // the PostingListJoinDataAccessor holds a max-sized posting list that is
+  // full) or freeing a pre-existing posting list if it is too small to fit all
+  // data necessary.
+  //
+  // RETURNS:
+  //   - OK, on success
+  //   - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
+  //     previously added data.
+  //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
+  //     posting list.
+  libtextclassifier3::Status PrependData(const JoinDataType& data);
+
+ private:
+  explicit PostingListJoinDataAccessor(
+      FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
+      PostingListJoinDataSerializer<JoinDataType>* serializer)
+      : PostingListAccessor(storage, std::move(in_memory_posting_list)),
+        serializer_(serializer) {}
+
+  PostingListJoinDataSerializer<JoinDataType>* serializer_;  // Does not own.
+};
+
+template <typename JoinDataType>
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+PostingListJoinDataAccessor<JoinDataType>::Create(
+    FlashIndexStorage* storage,
+    PostingListJoinDataSerializer<JoinDataType>* serializer) {
+  uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
+      storage->block_size(), serializer->GetDataTypeBytes());
+  ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
+                         PostingListUsed::CreateFromUnitializedRegion(
+                             serializer, max_posting_list_bytes));
+  return std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>(
+      new PostingListJoinDataAccessor<JoinDataType>(
+          storage, std::move(in_memory_posting_list), serializer));
+}
+
+template <typename JoinDataType>
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
+PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+    FlashIndexStorage* storage,
+    PostingListJoinDataSerializer<JoinDataType>* serializer,
+    PostingListIdentifier existing_posting_list_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      Create(storage, serializer));
+  ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                         storage->GetPostingList(existing_posting_list_id));
+  pl_accessor->preexisting_posting_list_ =
+      std::make_unique<PostingListHolder>(std::move(holder));
+  return pl_accessor;
+}
+
+// Returns the next batch of join data for the provided posting list.
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<std::vector<JoinDataType>>
+PostingListJoinDataAccessor<JoinDataType>::GetNextDataBatch() {
+  if (preexisting_posting_list_ == nullptr) {
+    if (has_reached_posting_list_chain_end_) {
+      return std::vector<JoinDataType>();
+    }
+    return absl_ports::FailedPreconditionError(
+        "Cannot retrieve data from a PostingListJoinDataAccessor that was not "
+        "created from a preexisting posting list.");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<JoinDataType> batch,
+      serializer_->GetData(&preexisting_posting_list_->posting_list));
+  uint32_t next_block_index = kInvalidBlockIndex;
+  // Posting lists will only be chained when they are max-sized, in which case
+  // next_block_index will point to the next block for the next posting list.
+  // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
+  // to the next free list block, which is not relevant here.
+  if (preexisting_posting_list_->posting_list.size_in_bytes() ==
+      storage_->max_posting_list_bytes()) {
+    next_block_index = preexisting_posting_list_->next_block_index;
+  }
+
+  if (next_block_index != kInvalidBlockIndex) {
+    // Since we only have to deal with next block for max-sized posting list
+    // block, max_num_posting_lists is 1 and posting_list_index_bits is
+    // BitsToStore(1).
+    PostingListIdentifier next_posting_list_id(
+        next_block_index, /*posting_list_index=*/0,
+        /*posting_list_index_bits=*/BitsToStore(1));
+    ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
+                           storage_->GetPostingList(next_posting_list_id));
+    preexisting_posting_list_ =
+        std::make_unique<PostingListHolder>(std::move(holder));
+  } else {
+    has_reached_posting_list_chain_end_ = true;
+    preexisting_posting_list_.reset();
+  }
+  return batch;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataAccessor<JoinDataType>::PrependData(
+    const JoinDataType& data) {
+  PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
+                                   ? preexisting_posting_list_->posting_list
+                                   : in_memory_posting_list_;
+  libtextclassifier3::Status status =
+      serializer_->PrependData(&active_pl, data);
+  if (!absl_ports::IsResourceExhausted(status)) {
+    return status;
+  }
+  // There is no more room to add data to this current posting list! Therefore,
+  // we need to either move those data to a larger posting list or flush this
+  // posting list and create another max-sized posting list in the chain.
+  if (preexisting_posting_list_ != nullptr) {
+    ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
+  } else {
+    ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
+  }
+
+  // Re-add data. Should always fit since we just cleared
+  // in_memory_posting_list_. It's fine to explicitly reference
+  // in_memory_posting_list_ here because there's no way of reaching this line
+  // while preexisting_posting_list_ is still in use.
+  return serializer_->PrependData(&in_memory_posting_list_, data);
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
diff --git a/icing/join/posting-list-join-data-accessor_test.cc b/icing/join/posting-list-join-data-accessor_test.cc
new file mode 100644
index 0000000..ddc2d32
--- /dev/null
+++ b/icing/join/posting-list-join-data-accessor_test.cc
@@ -0,0 +1,435 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/posting-list-join-data-accessor.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+using JoinDataType = DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>;
+
+static constexpr NamespaceId kDefaultNamespaceId = 1;
+
+class PostingListJoinDataAccessorTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/test_dir";
+    file_name_ = test_dir_ + "/test_file.idx.index";
+
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
+
+    serializer_ =
+        std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FlashIndexStorage flash_index_storage,
+        FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+    flash_index_storage_ =
+        std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+  }
+
+  void TearDown() override {
+    flash_index_storage_.reset();
+    serializer_.reset();
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+  }
+
+  Filesystem filesystem_;
+  std::string test_dir_;
+  std::string file_name_;
+  std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>> serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+};
+
+std::vector<JoinDataType> CreateData(int num_data, DocumentId start_document_id,
+                                     NamespaceId ref_namespace_id,
+                                     uint64_t start_ref_hash_uri) {
+  std::vector<JoinDataType> data;
+  data.reserve(num_data);
+  for (int i = 0; i < num_data; ++i) {
+    data.push_back(JoinDataType(
+        start_document_id,
+        NamespaceFingerprintIdentifier(ref_namespace_id,
+                                       /*fingerprint=*/start_ref_hash_uri)));
+
+    ++start_document_id;
+    ++start_ref_hash_uri;
+  }
+  return data;
+}
+
+TEST_F(PostingListJoinDataAccessorTest, DataAddAndRetrieveProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Add some join data
+  std::vector<JoinDataType> data_vec =
+      CreateData(/*num_data=*/5, /*start_document_id=*/0,
+                 /*ref_namespace_id=*/kDefaultNamespaceId,
+                 /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec) {
+    EXPECT_THAT(pl_accessor->PrependData(data), IsOk());
+  }
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result.status, IsOk());
+  EXPECT_THAT(result.id.block_index(), Eq(1));
+  EXPECT_THAT(result.id.posting_list_index(), Eq(0));
+
+  // Retrieve some data.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result.id));
+  EXPECT_THAT(
+      serializer_->GetData(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(data_vec.rbegin(), data_vec.rend())));
+  EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, PreexistingPLKeepOnSameBlock) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Add a single data. This will fit in a min-sized posting list.
+  JoinDataType data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/123));
+  ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  // Should be allocated to the first block.
+  ASSERT_THAT(result1.id.block_index(), Eq(1));
+  ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Add one more data. The minimum size for a posting list must be able to fit
+  // two data, so this should NOT cause the previous pl to be reallocated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  JoinDataType data2(
+      /*document_id=*/2,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/456));
+  ICING_ASSERT_OK(pl_accessor->PrependData(data2));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  // Should be in the same posting list.
+  EXPECT_THAT(result2.id, Eq(result1.id));
+
+  // The posting list at result2.id should hold all of the data that have been
+  // added.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAre(data2, data1)));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, PreexistingPLReallocateToLargerPL) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Adding 3 data should cause Finalize allocating a 56-byte posting list,
+  // which can store at most 4 data.
+  std::vector<JoinDataType> data_vec1 =
+      CreateData(/*num_data=*/3, /*start_document_id=*/0,
+                 /*ref_namespace_id=*/kDefaultNamespaceId,
+                 /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec1) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  // Should be allocated to the first block.
+  ASSERT_THAT(result1.id.block_index(), Eq(1));
+  ASSERT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Now add more data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  // The current posting list can fit 1 more data. Adding 12 more data should
+  // result in these data being moved to a larger posting list. Also the total
+  // size of these data won't exceed max size posting list, so there will be
+  // only one single posting list and no chain.
+  std::vector<JoinDataType> data_vec2 = CreateData(
+      /*num_data=*/12, /*start_document_id=*/data_vec1.back().document_id() + 1,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+
+  for (const JoinDataType& data : data_vec2) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  // Should be allocated to the second (new) block because the posting list
+  // should grow beyond the size that the first block maintains.
+  EXPECT_THAT(result2.id.block_index(), Eq(2));
+  EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
+
+  // The posting list at result2.id should hold all of the data that have been
+  // added.
+  std::vector<JoinDataType> all_data_vec;
+  all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+  all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+  all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
+                             flash_index_storage_->GetPostingList(result2.id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(all_data_vec.rbegin(),
+                                            all_data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest, MultiBlockChainsBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(JoinDataType)
+  // is 14, so the max size posting list can store (4096 - 12) / 14 = 291 data.
+  // Adding 292 data should cause:
+  // - 2 max size posting lists being allocated to block 1 and block 2.
+  // - Chaining: block 2 -> block 1
+  std::vector<JoinDataType> data_vec = CreateData(
+      /*num_data=*/292, /*start_document_id=*/0,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  PostingListIdentifier second_block_id = result1.id;
+  // Should be allocated to the second block.
+  EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
+                                   /*block_index=*/2, /*posting_list_index=*/0,
+                                   /*posting_list_index_bits=*/0)));
+
+  // We should be able to retrieve all data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_block_id));
+  // This pl_holder will only hold a posting list with the data that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<JoinDataType> second_block_data,
+                             serializer_->GetData(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_data, SizeIs(Lt(data_vec.size())));
+  auto first_block_data_start = data_vec.rbegin() + second_block_data.size();
+  EXPECT_THAT(second_block_data,
+              ElementsAreArray(data_vec.rbegin(), first_block_data_start));
+
+  // Now retrieve all of the data that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(
+      serializer_->GetData(&pl_holder.posting_list),
+      IsOkAndHolds(ElementsAreArray(first_block_data_start, data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       PreexistingMultiBlockReusesBlocksProperly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  // Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(JoinDataType)
+  // is 14, so the max size posting list can store (4096 - 12) / 14 = 291 data.
+  // Adding 292 data will cause:
+  // - 2 max size posting lists being allocated to block 1 and block 2.
+  // - Chaining: block 2 -> block 1
+  std::vector<JoinDataType> data_vec1 = CreateData(
+      /*num_data=*/292, /*start_document_id=*/0,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec1) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result1.status);
+  PostingListIdentifier first_add_id = result1.id;
+  EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
+                                /*block_index=*/2, /*posting_list_index=*/0,
+                                /*posting_list_index_bits=*/0)));
+
+  // Now add more data. These should fit on the existing second block and not
+  // fill it up.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), first_add_id));
+  std::vector<JoinDataType> data_vec2 = CreateData(
+      /*num_data=*/10, /*start_document_id=*/data_vec1.back().document_id() + 1,
+      /*ref_namespace_id=*/kDefaultNamespaceId, /*start_ref_hash_uri=*/819);
+  for (const JoinDataType& data : data_vec2) {
+    ICING_ASSERT_OK(pl_accessor->PrependData(data));
+  }
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor).Finalize();
+  ICING_ASSERT_OK(result2.status);
+  PostingListIdentifier second_add_id = result2.id;
+  EXPECT_THAT(second_add_id, Eq(first_add_id));
+
+  // We should be able to retrieve all data.
+  std::vector<JoinDataType> all_data_vec;
+  all_data_vec.reserve(data_vec1.size() + data_vec2.size());
+  all_data_vec.insert(all_data_vec.end(), data_vec1.begin(), data_vec1.end());
+  all_data_vec.insert(all_data_vec.end(), data_vec2.begin(), data_vec2.end());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListHolder pl_holder,
+      flash_index_storage_->GetPostingList(second_add_id));
+  // This pl_holder will only hold a posting list with the data that didn't fit
+  // on the first block.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<JoinDataType> second_block_data,
+                             serializer_->GetData(&pl_holder.posting_list));
+  ASSERT_THAT(second_block_data, SizeIs(Lt(all_data_vec.size())));
+  auto first_block_data_start =
+      all_data_vec.rbegin() + second_block_data.size();
+  EXPECT_THAT(second_block_data,
+              ElementsAreArray(all_data_vec.rbegin(), first_block_data_start));
+
+  // Now retrieve all of the data that were on the first block.
+  uint32_t first_block_id = pl_holder.next_block_index;
+  EXPECT_THAT(first_block_id, Eq(1));
+
+  PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
+                              /*posting_list_index_bits=*/0);
+  ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
+                             flash_index_storage_->GetPostingList(pl_id));
+  EXPECT_THAT(serializer_->GetData(&pl_holder.posting_list),
+              IsOkAndHolds(ElementsAreArray(first_block_data_start,
+                                            all_data_vec.rend())));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       InvalidDataShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  JoinDataType invalid_data = JoinDataType::GetInvalid();
+  EXPECT_THAT(pl_accessor->PrependData(invalid_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       JoinDataNonIncreasingShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  JoinDataType data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/819));
+  ICING_ASSERT_OK(pl_accessor->PrependData(data1));
+
+  JoinDataType data2(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/818));
+  EXPECT_THAT(pl_accessor->PrependData(data2),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  JoinDataType data3(/*document_id=*/1,
+                     NamespaceFingerprintIdentifier(kDefaultNamespaceId - 1,
+                                                    /*fingerprint=*/820));
+  EXPECT_THAT(pl_accessor->PrependData(data3),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  JoinDataType data4(/*document_id=*/0,
+                     NamespaceFingerprintIdentifier(kDefaultNamespaceId + 1,
+                                                    /*fingerprint=*/820));
+  EXPECT_THAT(pl_accessor->PrependData(data4),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       NewPostingListNoDataAddedShouldReturnInvalidArgument) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  EXPECT_THAT(result.status,
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PostingListJoinDataAccessorTest,
+       PreexistingPostingListNoDataAddedShouldSucceed) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor1,
+      PostingListJoinDataAccessor<JoinDataType>::Create(
+          flash_index_storage_.get(), serializer_.get()));
+  JoinDataType data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(kDefaultNamespaceId, /*fingerprint=*/819));
+  ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
+  PostingListAccessor::FinalizeResult result1 =
+      std::move(*pl_accessor1).Finalize();
+  ICING_ASSERT_OK(result1.status);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor2,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result1.id));
+  PostingListAccessor::FinalizeResult result2 =
+      std::move(*pl_accessor2).Finalize();
+  EXPECT_THAT(result2.status, IsOk());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/posting-list-join-data-serializer.h b/icing/join/posting-list-join-data-serializer.h
new file mode 100644
index 0000000..9f39dca
--- /dev/null
+++ b/icing/join/posting-list-join-data-serializer.h
@@ -0,0 +1,803 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
+#define ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
+
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+// A serializer class to serialize JoinDataType to PostingListUsed. Usually
+// JoinDataType is DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>,
+// DocumentIdToJoinInfo<TermId>, or DocumentIdToJoinInfo<int64_t>.
+//
+// REQUIRES:
+// - JoinDataType is comparable by operator <.
+// - JoinDataType implements is_valid() method.
+// - JoinDataType has static method GetInvalid() that returns a JoinDataType
+//   instance containing invalid data.
+template <typename JoinDataType>
+class PostingListJoinDataSerializer : public PostingListSerializer {
+ public:
+  using SpecialDataType = SpecialData<JoinDataType>;
+  static_assert(sizeof(SpecialDataType) == sizeof(JoinDataType), "");
+
+  static constexpr uint32_t kSpecialDataSize =
+      kNumSpecialData * sizeof(SpecialDataType);
+
+  uint32_t GetDataTypeBytes() const override { return sizeof(JoinDataType); }
+
+  uint32_t GetMinPostingListSize() const override {
+    static constexpr uint32_t kMinPostingListSize = kSpecialDataSize;
+    static_assert(sizeof(PostingListIndex) <= kMinPostingListSize,
+                  "PostingListIndex must be small enough to fit in a "
+                  "minimum-sized Posting List.");
+
+    return kMinPostingListSize;
+  }
+
+  uint32_t GetMinPostingListSizeToFit(
+      const PostingListUsed* posting_list_used) const override;
+
+  uint32_t GetBytesUsed(
+      const PostingListUsed* posting_list_used) const override;
+
+  void Clear(PostingListUsed* posting_list_used) const override;
+
+  libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
+                                      PostingListUsed* src) const override;
+
+  // Prepend a JoinData to the posting list.
+  //
+  // RETURNS:
+  //   - INVALID_ARGUMENT if !data.is_valid() or if data is not greater than the
+  //     previously added data.
+  //   - RESOURCE_EXHAUSTED if there is no more room to add data to the posting
+  //     list.
+  libtextclassifier3::Status PrependData(PostingListUsed* posting_list_used,
+                                         const JoinDataType& data) const;
+
+  // Prepend multiple JoinData to the posting list.
+  // Data should be sorted in ascending order (as defined by the less than
+  // operator for JoinData)
+  // If keep_prepended is true, whatever could be prepended is kept, otherwise
+  // the posting list is reverted and left in its original state.
+  //
+  // RETURNS:
+  //   The number of data that have been prepended to the posting list. If
+  //   keep_prepended is false and reverted, then it returns 0.
+  libtextclassifier3::StatusOr<uint32_t> PrependDataArray(
+      PostingListUsed* posting_list_used, const JoinDataType* array,
+      uint32_t num_data, bool keep_prepended) const;
+
+  // Retrieves all data stored in the posting list.
+  //
+  // RETURNS:
+  //   - On success, a vector of JoinDataType sorted by the reverse order of
+  //     prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetData(
+      const PostingListUsed* posting_list_used) const;
+
+  // Same as GetData but appends data to data_arr_out.
+  //
+  // RETURNS:
+  //   - OK on success, and data_arr_out will be appended JoinDataType sorted by
+  //     the reverse order of prepending.
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetData(
+      const PostingListUsed* posting_list_used,
+      std::vector<JoinDataType>* data_arr_out) const;
+
+  // Undo the last num_data data prepended. If num_data > number of data, then
+  // we clear all data.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status PopFrontData(PostingListUsed* posting_list_used,
+                                          uint32_t num_data) const;
+
+  // Helper function to determine if posting list is full.
+  bool IsFull(const PostingListUsed* posting_list_used) const {
+    return GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+           GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+ private:
+  // In PostingListJoinDataSerializer, there is no compression, but we still use
+  // the traditional posting list implementation.
+  //
+  // Posting list layout formats:
+  //
+  // NOT_FULL
+  // +-special-data-0--+-special-data-1--+------------+-----------------------+
+  // |                 |                 |            |                       |
+  // |data-start-offset|  Data::Invalid  | 0x00000000 |   (compressed) data   |
+  // |                 |                 |            |                       |
+  // +-----------------+-----------------+------------+-----------------------+
+  //
+  // ALMOST_FULL
+  // +-special-data-0--+-special-data-1--+-----+------------------------------+
+  // |                 |                 |     |                              |
+  // |  Data::Invalid  |    1st data     |(pad)|      (compressed) data       |
+  // |                 |                 |     |                              |
+  // +-----------------+-----------------+-----+------------------------------+
+  //
+  // FULL
+  // +-special-data-0--+-special-data-1--+-----+------------------------------+
+  // |                 |                 |     |                              |
+  // |    1st data     |    2nd data     |(pad)|      (compressed) data       |
+  // |                 |                 |     |                              |
+  // +-----------------+-----------------+-----+------------------------------+
+  //
+  // The first two uncompressed (special) data also implicitly encode
+  // information about the size of the compressed data region.
+  //
+  // 1. If the posting list is NOT_FULL, then special_data_0 contains the byte
+  //    offset of the start of the compressed data. Thus, the size of the
+  //    compressed data is
+  //    posting_list_used->size_in_bytes() - special_data_0.data_start_offset().
+  //
+  // 2. If posting list is ALMOST_FULL or FULL, then the compressed data region
+  //    starts somewhere between
+  //    [kSpecialDataSize, kSpecialDataSize + sizeof(JoinDataType) - 1] and ends
+  //    at posting_list_used->size_in_bytes() - 1.
+  //
+  // EXAMPLE
+  // JoinDataType = DocumentIdToJoinInfo<int64_t>. Posting list size: 48 bytes
+  //
+  // EMPTY!
+  // +-- byte 0-11 --+---- 12-23 ----+------------ 24-47 -------------+
+  // |               |               |                                |
+  // |      48       | Data::Invalid |           0x00000000           |
+  // |               |               |                                |
+  // +---------------+---------------+--------------------------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 12, JoinInteger = 5)
+  // NOT FULL!
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // |               |               |               | 12            |
+  // |      36       | Data::Invalid |  0x00000000   |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 18, JoinInteger = -2)
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // |               |               | 18            | 12            |
+  // |      24       | Data::Invalid | -2            |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 22, JoinInteger = 3)
+  // ALMOST_FULL!
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // |               | 22            | 18            | 12            |
+  // | Data::Invalid |  3            | -2            |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+  //
+  // Add DocumentIdToJoinInfo<int64_t>(DocumentId = 27, JoinInteger = 0)
+  // FULL!
+  // +-- byte 0-11 --+---- 12-23 ----+---- 24-35 ----+---- 36-47 ----+
+  // | 27            | 22            | 18            | 12            |
+  // |  0            |  3            | -2            |  5            |
+  // |               |               |               |               |
+  // +---------------+---------------+---------------+---------------+
+
+  // Helpers to determine what state the posting list is in.
+  bool IsAlmostFull(const PostingListUsed* posting_list_used) const {
+    return !GetSpecialData(posting_list_used, /*index=*/0).data().is_valid() &&
+           GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+  bool IsEmpty(const PostingListUsed* posting_list_used) const {
+    return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset() ==
+               posting_list_used->size_in_bytes() &&
+           !GetSpecialData(posting_list_used, /*index=*/1).data().is_valid();
+  }
+
+  // Returns false if both special data are invalid or if data start offset
+  // stored in the special data is less than kSpecialDataSize or greater than
+  // posting_list_used->size_in_bytes(). Returns true, otherwise.
+  bool IsPostingListValid(const PostingListUsed* posting_list_used) const;
+
+  // Prepend data to a posting list that is in the ALMOST_FULL state.
+  //
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if data is not less than the previously added data.
+  libtextclassifier3::Status PrependDataToAlmostFull(
+      PostingListUsed* posting_list_used, const JoinDataType& data) const;
+
+  // Prepend data to a posting list that is in the EMPTY state. This will always
+  // succeed because there are no pre-existing data and no validly constructed
+  // posting list could fail to fit one data.
+  void PrependDataToEmpty(PostingListUsed* posting_list_used,
+                          const JoinDataType& data) const;
+
+  // Prepend data to a posting list that is in the NOT_FULL state.
+  //
+  // RETURNS:
+  //  - OK, if successful
+  //  - INVALID_ARGUMENT if data is not less than the previously added data.
+  libtextclassifier3::Status PrependDataToNotFull(
+      PostingListUsed* posting_list_used, const JoinDataType& data,
+      uint32_t offset) const;
+
+  // Returns either 0 (FULL state), sizeof(JoinDataType) (ALMOST_FULL state) or
+  // a byte offset between kSpecialDataSize and
+  // posting_list_used->size_in_bytes() (inclusive) (NOT_FULL state).
+  uint32_t GetStartByteOffset(const PostingListUsed* posting_list_used) const;
+
+  // Sets special data 0 to properly reflect what start byte offset is (see
+  // layout comment for further details).
+  //
+  // Returns false if offset > posting_list_used->size_in_bytes() or offset is
+  // in range (kSpecialDataSize, sizeof(JoinDataType)) or
+  // (sizeof(JoinDataType), 0). True, otherwise.
+  bool SetStartByteOffset(PostingListUsed* posting_list_used,
+                          uint32_t offset) const;
+
+  // Helper for MoveFrom/GetData/PopFrontData. Adds limit number of data to out
+  // or all data in the posting list if the posting list contains less than
+  // limit number of data. out can be NULL.
+  //
+  // NOTE: If called with limit=1, pop=true on a posting list that transitioned
+  // from NOT_FULL directly to FULL, GetDataInternal will not return the posting
+  // list to NOT_FULL. Instead it will leave it in a valid state, but it will be
+  // ALMOST_FULL.
+  //
+  // RETURNS:
+  //   - OK on success
+  //   - INTERNAL_ERROR if the posting list has been corrupted somehow.
+  libtextclassifier3::Status GetDataInternal(
+      const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+      std::vector<JoinDataType>* out) const;
+
+  // Retrieves the value stored in the index-th special data.
+  //
+  // REQUIRES:
+  //   0 <= index < kNumSpecialData.
+  //
+  // RETURNS:
+  //   - A valid SpecialData<JoinDataType>.
+  SpecialDataType GetSpecialData(const PostingListUsed* posting_list_used,
+                                 uint32_t index) const;
+
+  // Sets the value stored in the index-th special data to special_data.
+  //
+  // REQUIRES:
+  //   0 <= index < kNumSpecialData.
+  void SetSpecialData(PostingListUsed* posting_list_used, uint32_t index,
+                      const SpecialDataType& special_data) const;
+
+  // Prepends data to the memory region
+  // [offset - sizeof(JoinDataType), offset - 1] and
+  // returns the new beginning of the region.
+  //
+  // RETURNS:
+  //   - The new beginning of the padded region, if successful.
+  //   - INVALID_ARGUMENT if data will not fit (uncompressed) between
+  //       [kSpecialDataSize, offset - 1]
+  libtextclassifier3::StatusOr<uint32_t> PrependDataUncompressed(
+      PostingListUsed* posting_list_used, const JoinDataType& data,
+      uint32_t offset) const;
+};
+
+template <typename JoinDataType>
+uint32_t PostingListJoinDataSerializer<JoinDataType>::GetBytesUsed(
+    const PostingListUsed* posting_list_used) const {
+  // The special data will be included if they represent actual data. If they
+  // represent the data start offset or the invalid data sentinel, they are not
+  // included.
+  return posting_list_used->size_in_bytes() -
+         GetStartByteOffset(posting_list_used);
+}
+
+template <typename JoinDataType>
+uint32_t
+PostingListJoinDataSerializer<JoinDataType>::GetMinPostingListSizeToFit(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
+    // If in either the FULL state or ALMOST_FULL state, this posting list *is*
+    // the minimum size posting list that can fit these data. So just return the
+    // size of the posting list.
+    return posting_list_used->size_in_bytes();
+  }
+
+  // In NOT_FULL state, BytesUsed contains no special data. The minimum sized
+  // posting list that would be guaranteed to fit these data would be
+  // ALMOST_FULL, with kInvalidData in special data 0, the uncompressed data in
+  // special data 1 and the n compressed data in the compressed region.
+  // BytesUsed contains one uncompressed data and n compressed data. Therefore,
+  // fitting these data into a posting list would require BytesUsed plus one
+  // extra data.
+  return GetBytesUsed(posting_list_used) + GetDataTypeBytes();
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::Clear(
+    PostingListUsed* posting_list_used) const {
+  // Safe to ignore return value because posting_list_used->size_in_bytes() is
+  // a valid argument.
+  SetStartByteOffset(posting_list_used,
+                     /*offset=*/posting_list_used->size_in_bytes());
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::MoveFrom(
+    PostingListUsed* dst, PostingListUsed* src) const {
+  ICING_RETURN_ERROR_IF_NULL(dst);
+  ICING_RETURN_ERROR_IF_NULL(src);
+  if (GetMinPostingListSizeToFit(src) > dst->size_in_bytes()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "src MinPostingListSizeToFit %d must be larger than size %d.",
+        GetMinPostingListSizeToFit(src), dst->size_in_bytes()));
+  }
+
+  if (!IsPostingListValid(dst)) {
+    return absl_ports::FailedPreconditionError(
+        "Dst posting list is in an invalid state and can't be used!");
+  }
+  if (!IsPostingListValid(src)) {
+    return absl_ports::InvalidArgumentError(
+        "Cannot MoveFrom an invalid src posting list!");
+  }
+
+  // Pop just enough data that all of src's compressed data fit in
+  // dst posting_list's compressed area. Then we can memcpy that area.
+  std::vector<JoinDataType> data_arr;
+  while (IsFull(src) || IsAlmostFull(src) ||
+         (dst->size_in_bytes() - kSpecialDataSize < GetBytesUsed(src))) {
+    if (!GetDataInternal(src, /*limit=*/1, /*pop=*/true, &data_arr).ok()) {
+      return absl_ports::AbortedError(
+          "Unable to retrieve data from src posting list.");
+    }
+  }
+
+  // memcpy the area and set up start byte offset.
+  Clear(dst);
+  memcpy(dst->posting_list_buffer() + dst->size_in_bytes() - GetBytesUsed(src),
+         src->posting_list_buffer() + GetStartByteOffset(src),
+         GetBytesUsed(src));
+  // Because we popped all data from src outside of the compressed area and we
+  // guaranteed that GetBytesUsed(src) is less than dst->size_in_bytes() -
+  // kSpecialDataSize. This is guaranteed to be a valid byte offset for the
+  // NOT_FULL state, so ignoring the value is safe.
+  SetStartByteOffset(dst, dst->size_in_bytes() - GetBytesUsed(src));
+
+  // Put back remaining data.
+  for (auto riter = data_arr.rbegin(); riter != data_arr.rend(); ++riter) {
+    // PrependData may return:
+    // - INVALID_ARGUMENT: if data is invalid or not less than the previous data
+    // - RESOURCE_EXHAUSTED
+    // RESOURCE_EXHAUSTED should be impossible because we've already assured
+    // that there is enough room above.
+    ICING_RETURN_IF_ERROR(PrependData(dst, *riter));
+  }
+
+  Clear(src);
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependDataToAlmostFull(
+    PostingListUsed* posting_list_used, const JoinDataType& data) const {
+  SpecialDataType special_data = GetSpecialData(posting_list_used, /*index=*/1);
+  if (data < special_data.data()) {
+    return absl_ports::InvalidArgumentError(
+        "JoinData being prepended must not be smaller than the most recent "
+        "JoinData");
+  }
+
+  // Without compression, prepend a new data into ALMOST_FULL posting list will
+  // change the posting list to FULL state. Therefore, set special data 0
+  // directly.
+  SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(data));
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::PrependDataToEmpty(
+    PostingListUsed* posting_list_used, const JoinDataType& data) const {
+  // First data to be added. Just add verbatim, no compression.
+  if (posting_list_used->size_in_bytes() == kSpecialDataSize) {
+    // First data will be stored at special data 1.
+    // Safe to ignore the return value because 1 < kNumSpecialData
+    SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+    // Safe to ignore the return value because sizeof(JoinDataType) is a valid
+    // argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(JoinDataType));
+  } else {
+    // Since this is the first data, size != kSpecialDataSize and
+    // size % sizeof(JoinDataType) == 0, we know that there is room to fit
+    // 'data' into the compressed region, so ValueOrDie is safe.
+    uint32_t offset =
+        PrependDataUncompressed(posting_list_used, data,
+                                /*offset=*/posting_list_used->size_in_bytes())
+            .ValueOrDie();
+    // Safe to ignore the return value because PrependDataUncompressed is
+    // guaranteed to return a valid offset.
+    SetStartByteOffset(posting_list_used, offset);
+  }
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependDataToNotFull(
+    PostingListUsed* posting_list_used, const JoinDataType& data,
+    uint32_t offset) const {
+  JoinDataType curr = JoinDataType::GetInvalid();
+  memcpy(&curr, posting_list_used->posting_list_buffer() + offset,
+         sizeof(JoinDataType));
+  if (data < curr) {
+    return absl_ports::InvalidArgumentError(
+        "JoinData being prepended must not be smaller than the most recent "
+        "JoinData");
+  }
+
+  if (offset >= kSpecialDataSize + sizeof(JoinDataType)) {
+    offset =
+        PrependDataUncompressed(posting_list_used, data, offset).ValueOrDie();
+    SetStartByteOffset(posting_list_used, offset);
+  } else {
+    // The new data must be put in special data 1.
+    SetSpecialData(posting_list_used, /*index=*/1, SpecialDataType(data));
+    // State ALMOST_FULL. Safe to ignore the return value because
+    // sizeof(JoinDataType) is a valid argument.
+    SetStartByteOffset(posting_list_used, /*offset=*/sizeof(JoinDataType));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PrependData(
+    PostingListUsed* posting_list_used, const JoinDataType& data) const {
+  if (!data.is_valid()) {
+    return absl_ports::InvalidArgumentError("Cannot prepend an invalid data!");
+  }
+  if (!IsPostingListValid(posting_list_used)) {
+    return absl_ports::FailedPreconditionError(
+        "This PostingListUsed is in an invalid state and can't add any data!");
+  }
+
+  if (IsFull(posting_list_used)) {
+    // State FULL: no space left.
+    return absl_ports::ResourceExhaustedError("No more room for data");
+  } else if (IsAlmostFull(posting_list_used)) {
+    return PrependDataToAlmostFull(posting_list_used, data);
+  } else if (IsEmpty(posting_list_used)) {
+    PrependDataToEmpty(posting_list_used, data);
+    return libtextclassifier3::Status::OK;
+  } else {
+    uint32_t offset = GetStartByteOffset(posting_list_used);
+    return PrependDataToNotFull(posting_list_used, data, offset);
+  }
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListJoinDataSerializer<JoinDataType>::PrependDataArray(
+    PostingListUsed* posting_list_used, const JoinDataType* array,
+    uint32_t num_data, bool keep_prepended) const {
+  if (!IsPostingListValid(posting_list_used)) {
+    return 0;
+  }
+
+  uint32_t i;
+  for (i = 0; i < num_data; ++i) {
+    if (!PrependData(posting_list_used, array[i]).ok()) {
+      break;
+    }
+  }
+  if (i != num_data && !keep_prepended) {
+    // Didn't fit. Undo everything and check that we have the same offset as
+    // before. PopFrontData guarantees that it will remove all 'i' data so long
+    // as there are at least 'i' data in the posting list, which we know there
+    // are.
+    ICING_RETURN_IF_ERROR(PopFrontData(posting_list_used, /*num_data=*/i));
+    return 0;
+  }
+  return i;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<std::vector<JoinDataType>>
+PostingListJoinDataSerializer<JoinDataType>::GetData(
+    const PostingListUsed* posting_list_used) const {
+  std::vector<JoinDataType> data_arr_out;
+  ICING_RETURN_IF_ERROR(GetData(posting_list_used, &data_arr_out));
+  return data_arr_out;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status PostingListJoinDataSerializer<JoinDataType>::GetData(
+    const PostingListUsed* posting_list_used,
+    std::vector<JoinDataType>* data_arr_out) const {
+  return GetDataInternal(posting_list_used,
+                         /*limit=*/std::numeric_limits<uint32_t>::max(),
+                         /*pop=*/false, data_arr_out);
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::PopFrontData(
+    PostingListUsed* posting_list_used, uint32_t num_data) const {
+  if (num_data == 1 && IsFull(posting_list_used)) {
+    // The PL is in FULL state which means that we save 2 uncompressed data in
+    // the 2 special postions. But FULL state may be reached by 2 different
+    // states.
+    // (1) In ALMOST_FULL state
+    // +------------------+-----------------+-----+---------------------------+
+    // |Data::Invalid     |1st data         |(pad)|(compressed) data          |
+    // |                  |                 |     |                           |
+    // +------------------+-----------------+-----+---------------------------+
+    // When we prepend another data, we can only put it at special data 0, and
+    // thus get a FULL PL
+    // +------------------+-----------------+-----+---------------------------+
+    // |new 1st data      |original 1st data|(pad)|(compressed) data          |
+    // |                  |                 |     |                           |
+    // +------------------+-----------------+-----+---------------------------+
+    //
+    // (2) In NOT_FULL state
+    // +------------------+-----------------+-------+---------+---------------+
+    // |data-start-offset |Data::Invalid    |(pad)  |1st data |(compressed)   |
+    // |                  |                 |       |         |data           |
+    // +------------------+-----------------+-------+---------+---------------+
+    // When we prepend another data, we can reach any of the 3 following
+    // scenarios:
+    // (2.1) NOT_FULL
+    // if the space of pad and original 1st data can accommodate the new 1st
+    // data and the encoded delta value.
+    // +------------------+-----------------+-----+--------+------------------+
+    // |data-start-offset |Data::Invalid    |(pad)|new     |(compressed) data |
+    // |                  |                 |     |1st data|                  |
+    // +------------------+-----------------+-----+--------+------------------+
+    // (2.2) ALMOST_FULL
+    // If the space of pad and original 1st data cannot accommodate the new 1st
+    // data and the encoded delta value but can accommodate the encoded delta
+    // value only. We can put the new 1st data at special position 1.
+    // +------------------+-----------------+---------+-----------------------+
+    // |Data::Invalid     |new 1st data     |(pad)    |(compressed) data      |
+    // |                  |                 |         |                       |
+    // +------------------+-----------------+---------+-----------------------+
+    // (2.3) FULL
+    // In very rare case, it cannot even accommodate only the encoded delta
+    // value. we can move the original 1st data into special position 1 and the
+    // new 1st data into special position 0. This may happen because we use
+    // VarInt encoding method which may make the encoded value longer (about
+    // 4/3 times of original)
+    // +------------------+-----------------+--------------+------------------+
+    // |new 1st data      |original 1st data|(pad)         |(compressed) data |
+    // |                  |                 |              |                  |
+    // +------------------+-----------------+--------------+------------------+
+    //
+    // Suppose now the PL is in FULL state. But we don't know whether it arrived
+    // this state from NOT_FULL (like (2.3)) or from ALMOST_FULL (like (1)).
+    // We'll return to ALMOST_FULL state like (1) if we simply pop the new 1st
+    // data, but we want to make the prepending operation "reversible". So
+    // there should be some way to return to NOT_FULL if possible. A simple way
+    // to do is:
+    // - Pop 2 data out of the PL to state ALMOST_FULL or NOT_FULL.
+    // - Add the second data ("original 1st data") back.
+    //
+    // Then we can return to the correct original states of (2.1) or (1). This
+    // makes our prepending operation reversible.
+    std::vector<JoinDataType> out;
+
+    // Popping 2 data should never fail because we've just ensured that the
+    // posting list is in the FULL state.
+    ICING_RETURN_IF_ERROR(
+        GetDataInternal(posting_list_used, /*limit=*/2, /*pop=*/true, &out));
+
+    // PrependData should never fail because:
+    // - out[1] is a valid data less than all previous data in the posting list.
+    // - There's no way that the posting list could run out of room because it
+    //   previously stored these 2 data.
+    ICING_RETURN_IF_ERROR(PrependData(posting_list_used, out[1]));
+  } else if (num_data > 0) {
+    return GetDataInternal(posting_list_used, /*limit=*/num_data, /*pop=*/true,
+                           /*out=*/nullptr);
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::Status
+PostingListJoinDataSerializer<JoinDataType>::GetDataInternal(
+    const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
+    std::vector<JoinDataType>* out) const {
+  uint32_t offset = GetStartByteOffset(posting_list_used);
+  uint32_t count = 0;
+
+  // First traverse the first two special positions.
+  while (count < limit && offset < kSpecialDataSize) {
+    // offset / sizeof(JoinDataType) < kNumSpecialData
+    // because of the check above.
+    SpecialDataType special_data = GetSpecialData(
+        posting_list_used, /*index=*/offset / sizeof(JoinDataType));
+    if (out != nullptr) {
+      out->push_back(special_data.data());
+    }
+    offset += sizeof(JoinDataType);
+    ++count;
+  }
+
+  // - We don't compress the data.
+  // - The posting list size is a multiple of data type bytes.
+  // So offset of the first non-special data is guaranteed to be at
+  // kSpecialDataSize if in ALMOST_FULL or FULL state. In fact, we must not
+  // apply padding skipping logic here when still storing uncompressed data,
+  // because in this case 0 bytes are meanful (e.g. inverted doc id byte = 0).
+  while (count < limit && offset < posting_list_used->size_in_bytes()) {
+    JoinDataType data = JoinDataType::GetInvalid();
+    memcpy(&data, posting_list_used->posting_list_buffer() + offset,
+           sizeof(JoinDataType));
+    offset += sizeof(JoinDataType);
+    if (out != nullptr) {
+      out->push_back(data);
+    }
+    ++count;
+  }
+
+  if (pop) {
+    PostingListUsed* mutable_posting_list_used =
+        const_cast<PostingListUsed*>(posting_list_used);
+    // Modify the posting list so that we pop all data actually traversed.
+    if (offset >= kSpecialDataSize &&
+        offset < posting_list_used->size_in_bytes()) {
+      memset(
+          mutable_posting_list_used->posting_list_buffer() + kSpecialDataSize,
+          0, offset - kSpecialDataSize);
+    }
+    SetStartByteOffset(mutable_posting_list_used, offset);
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename JoinDataType>
+typename PostingListJoinDataSerializer<JoinDataType>::SpecialDataType
+PostingListJoinDataSerializer<JoinDataType>::GetSpecialData(
+    const PostingListUsed* posting_list_used, uint32_t index) const {
+  // It is ok to temporarily construct a SpecialData with offset = 0 since we're
+  // going to overwrite it by memcpy.
+  SpecialDataType special_data(0);
+  memcpy(&special_data,
+         posting_list_used->posting_list_buffer() +
+             index * sizeof(SpecialDataType),
+         sizeof(SpecialDataType));
+  return special_data;
+}
+
+template <typename JoinDataType>
+void PostingListJoinDataSerializer<JoinDataType>::SetSpecialData(
+    PostingListUsed* posting_list_used, uint32_t index,
+    const SpecialDataType& special_data) const {
+  memcpy(posting_list_used->posting_list_buffer() +
+             index * sizeof(SpecialDataType),
+         &special_data, sizeof(SpecialDataType));
+}
+
+template <typename JoinDataType>
+bool PostingListJoinDataSerializer<JoinDataType>::IsPostingListValid(
+    const PostingListUsed* posting_list_used) const {
+  if (IsAlmostFull(posting_list_used)) {
+    // Special data 1 should hold a valid data.
+    if (!GetSpecialData(posting_list_used, /*index=*/1).data().is_valid()) {
+      ICING_LOG(ERROR)
+          << "Both special data cannot be invalid at the same time.";
+      return false;
+    }
+  } else if (!IsFull(posting_list_used)) {
+    // NOT_FULL. Special data 0 should hold a valid offset.
+    SpecialDataType special_data =
+        GetSpecialData(posting_list_used, /*index=*/0);
+    if (special_data.data_start_offset() > posting_list_used->size_in_bytes() ||
+        special_data.data_start_offset() < kSpecialDataSize) {
+      ICING_LOG(ERROR) << "Offset: " << special_data.data_start_offset()
+                       << " size: " << posting_list_used->size_in_bytes()
+                       << " sp size: " << kSpecialDataSize;
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename JoinDataType>
+uint32_t PostingListJoinDataSerializer<JoinDataType>::GetStartByteOffset(
+    const PostingListUsed* posting_list_used) const {
+  if (IsFull(posting_list_used)) {
+    return 0;
+  } else if (IsAlmostFull(posting_list_used)) {
+    return sizeof(JoinDataType);
+  } else {
+    return GetSpecialData(posting_list_used, /*index=*/0).data_start_offset();
+  }
+}
+
+template <typename JoinDataType>
+bool PostingListJoinDataSerializer<JoinDataType>::SetStartByteOffset(
+    PostingListUsed* posting_list_used, uint32_t offset) const {
+  if (offset > posting_list_used->size_in_bytes()) {
+    ICING_LOG(ERROR) << "offset cannot be a value greater than size "
+                     << posting_list_used->size_in_bytes() << ". offset is "
+                     << offset << ".";
+    return false;
+  }
+  if (offset < kSpecialDataSize && offset > sizeof(JoinDataType)) {
+    ICING_LOG(ERROR) << "offset cannot be a value between ("
+                     << sizeof(JoinDataType) << ", " << kSpecialDataSize
+                     << "). offset is " << offset << ".";
+    return false;
+  }
+  if (offset < sizeof(JoinDataType) && offset != 0) {
+    ICING_LOG(ERROR) << "offset cannot be a value between (0, "
+                     << sizeof(JoinDataType) << "). offset is " << offset
+                     << ".";
+    return false;
+  }
+
+  if (offset >= kSpecialDataSize) {
+    // NOT_FULL state.
+    SetSpecialData(posting_list_used, /*index=*/0, SpecialDataType(offset));
+    SetSpecialData(posting_list_used, /*index=*/1,
+                   SpecialDataType(JoinDataType::GetInvalid()));
+  } else if (offset == sizeof(JoinDataType)) {
+    // ALMOST_FULL state.
+    SetSpecialData(posting_list_used, /*index=*/0,
+                   SpecialDataType(JoinDataType::GetInvalid()));
+  }
+  // Nothing to do for the FULL state - the offset isn't actually stored
+  // anywhere and both 2 special data hold valid data.
+  return true;
+}
+
+template <typename JoinDataType>
+libtextclassifier3::StatusOr<uint32_t>
+PostingListJoinDataSerializer<JoinDataType>::PrependDataUncompressed(
+    PostingListUsed* posting_list_used, const JoinDataType& data,
+    uint32_t offset) const {
+  if (offset < kSpecialDataSize + sizeof(JoinDataType)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Not enough room to prepend JoinData at offset %d.", offset));
+  }
+  offset -= sizeof(JoinDataType);
+  memcpy(posting_list_used->posting_list_buffer() + offset, &data,
+         sizeof(JoinDataType));
+  return offset;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_POSTING_LIST_JOIN_DATA_SERIALIZER_H_
diff --git a/icing/join/posting-list-join-data-serializer_test.cc b/icing/join/posting-list-join-data-serializer_test.cc
new file mode 100644
index 0000000..20137b6
--- /dev/null
+++ b/icing/join/posting-list-join-data-serializer_test.cc
@@ -0,0 +1,653 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/posting-list-join-data-serializer.h"
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/testing/common-matchers.h"
+
+using testing::ElementsAre;
+using testing::ElementsAreArray;
+using testing::Eq;
+using testing::IsEmpty;
+using testing::SizeIs;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitNotNull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size =
+      2551 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/2))),
+      IsOk());
+  EXPECT_THAT(
+      serializer.GetMinPostingListSizeToFit(&pl_used),
+      Eq(2 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/5))),
+      IsOk());
+  EXPECT_THAT(
+      serializer.GetMinPostingListSizeToFit(&pl_used),
+      Eq(3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+}
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitAlmostFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/2))),
+      IsOk());
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/5))),
+      IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListJoinDataSerializerTest, GetMinPostingListSizeToFitFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/2))),
+      IsOk());
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/5))),
+      IsOk());
+  ASSERT_THAT(
+      serializer.PrependData(
+          &pl_used,
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                     /*namespace_id=*/1, /*fingerprint=*/10))),
+      IsOk());
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataNotFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size =
+      2551 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Make used.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  // Size = sizeof(uncompressed data0)
+  int expected_size =
+      sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  // Size = sizeof(uncompressed data1)
+  //        + sizeof(uncompressed data0)
+  expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+      /*document_id=*/2, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/10));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+  // Size = sizeof(uncompressed data2)
+  //        + sizeof(uncompressed data1)
+  //        + sizeof(uncompressed data0)
+  expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data2, data1, data0)));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataAlmostFull) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 4 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // Fill up the compressed region.
+  // Transitions:
+  // Adding data0: EMPTY -> NOT_FULL
+  // Adding data1: NOT_FULL -> NOT_FULL
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  int expected_size =
+      2 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  // Add one more data to transition NOT_FULL -> ALMOST_FULL
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+      /*document_id=*/2, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/10));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2), IsOk());
+  expected_size =
+      3 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data2, data1, data0)));
+
+  // Add one more data to transition ALMOST_FULL -> FULL
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data3(
+      /*document_id=*/3, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/0));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data3), IsOk());
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data3, data2, data1, data0)));
+
+  // The posting list is FULL. Adding another data should fail.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data4(
+      /*document_id=*/4, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/0, /*fingerprint=*/1234));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data4),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependSmallerDataShouldFail) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 4 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data(
+      /*document_id=*/100,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> smaller_data(
+      /*document_id=*/99,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+
+  // NOT_FULL -> NOT_FULL
+  ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // NOT_FULL -> ALMOST_FULL
+  ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // ALMOST_FULL -> FULL
+  ASSERT_THAT(serializer.PrependData(&pl_used, data), IsOk());
+  EXPECT_THAT(serializer.PrependData(&pl_used, smaller_data),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataPostingListUsedMinSize) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  // PL State: EMPTY
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(IsEmpty()));
+
+  // Add a data. PL should shift to ALMOST_FULL state
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data0(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data0), IsOk());
+  // Size = sizeof(uncompressed data0)
+  int expected_size =
+      sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used), IsOkAndHolds(ElementsAre(data0)));
+
+  // Add another data. PL should shift to FULL state.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data1(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data1), IsOk());
+  // Size = sizeof(uncompressed data1) + sizeof(uncompressed data0)
+  expected_size += sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetData(&pl_used),
+              IsOkAndHolds(ElementsAre(data1, data0)));
+
+  // The posting list is FULL. Adding another data should fail.
+  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier> data2(
+      /*document_id=*/2, NamespaceFingerprintIdentifier(
+                             /*namespace_id=*/1, /*fingerprint=*/10));
+  EXPECT_THAT(serializer.PrependData(&pl_used, data2),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataArrayDoNotKeepPrepended) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 6 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_in;
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_pushed;
+
+  // Add 3 data. The PL is in the empty state and should be able to fit all 3
+  // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/2,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/10)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 2 data. The PL should transition from NOT_FULL to ALMOST_FULL.
+  data_in.clear();
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/3,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/0)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/4, NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                                        /*fingerprint=*/1234)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 2 data. The PL should remain ALMOST_FULL since the remaining space can
+  // only fit 1 data.
+  data_in.clear();
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/5, NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+                                                        /*fingerprint=*/99)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/6, NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                                        /*fingerprint=*/63)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(0));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 1 data. The PL should transition from ALMOST_FULL to FULL.
+  data_in.pop_back();
+  ASSERT_THAT(data_in, SizeIs(1));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, PrependDataArrayKeepPrepended) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 6 * sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_in;
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_pushed;
+
+  // Add 3 data. The PL is in the empty state and should be able to fit all 3
+  // data without issue, transitioning the PL from EMPTY -> NOT_FULL.
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/0,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/2)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/1,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/5)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/2,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/10)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/true),
+      IsOkAndHolds(data_in.size()));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+
+  // Add 4 data. The PL should prepend 3 data and transition from NOT_FULL to
+  // FULL.
+  data_in.clear();
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/3,
+      NamespaceFingerprintIdentifier(/*namespace_id=*/1, /*fingerprint=*/0)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/4, NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                                        /*fingerprint=*/1234)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/5, NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+                                                        /*fingerprint=*/99)));
+  data_in.push_back(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+      /*document_id=*/6, NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                                        /*fingerprint=*/63)));
+  EXPECT_THAT(
+      serializer.PrependDataArray(&pl_used, data_in.data(), data_in.size(),
+                                  /*keep_prepended=*/true),
+      IsOkAndHolds(3));
+  data_in.pop_back();
+  ASSERT_THAT(data_in, SizeIs(3));
+  std::move(data_in.begin(), data_in.end(), std::back_inserter(data_pushed));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used),
+              Eq(data_pushed.size() *
+                 sizeof(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>)));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveFrom) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr1 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/2)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/5))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr1.size()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr2 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/10)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/3, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/0)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/4,
+           NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                          /*fingerprint=*/1234)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/5,
+           NamespaceFingerprintIdentifier(/*namespace_id=*/2,
+                                          /*fingerprint=*/99))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr2.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              IsOk());
+  EXPECT_THAT(
+      serializer.GetData(&pl_used2),
+      IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+  EXPECT_THAT(serializer.GetData(&pl_used1), IsOkAndHolds(IsEmpty()));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveToNullReturnsFailedPrecondition) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr = {
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/2)),
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/5))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used, /*src=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/nullptr, /*src=*/&pl_used),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, MoveToPostingListTooSmall) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size1 = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used1,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size1));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr1 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/2)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/5)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/10)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/3, NamespaceFingerprintIdentifier(
+                                  /*namespace_id=*/1, /*fingerprint=*/0)),
+       DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+           /*document_id=*/4,
+           NamespaceFingerprintIdentifier(/*namespace_id=*/0,
+                                          /*fingerprint=*/1234))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used1, data_arr1.data(), data_arr1.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr1.size()));
+
+  int size2 = serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used2,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size2));
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr2 =
+      {DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/5, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/2, /*fingerprint=*/99))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used2, data_arr2.data(), data_arr2.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr2.size()));
+
+  EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used1),
+      IsOkAndHolds(ElementsAreArray(data_arr1.rbegin(), data_arr1.rend())));
+  EXPECT_THAT(
+      serializer.GetData(&pl_used2),
+      IsOkAndHolds(ElementsAreArray(data_arr2.rbegin(), data_arr2.rend())));
+}
+
+TEST(PostingListJoinDataSerializerTest, PopFrontData) {
+  PostingListJoinDataSerializer<
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>>
+      serializer;
+
+  int size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+
+  std::vector<DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>> data_arr = {
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/0, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/2)),
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/1, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/5)),
+      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+          /*document_id=*/2, NamespaceFingerprintIdentifier(
+                                 /*namespace_id=*/1, /*fingerprint=*/10))};
+  ASSERT_THAT(
+      serializer.PrependDataArray(&pl_used, data_arr.data(), data_arr.size(),
+                                  /*keep_prepended=*/false),
+      IsOkAndHolds(data_arr.size()));
+  ASSERT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+
+  // Now, pop the last data. The posting list should contain the first three
+  // data.
+  EXPECT_THAT(serializer.PopFrontData(&pl_used, /*num_data=*/1), IsOk());
+  data_arr.pop_back();
+  EXPECT_THAT(
+      serializer.GetData(&pl_used),
+      IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index.cc b/icing/join/qualified-id-join-index-impl-v1.cc
index 07b5627..cdcb5a9 100644
--- a/icing/join/qualified-id-join-index.cc
+++ b/icing/join/qualified-id-join-index-impl-v1.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
 
 #include <cstring>
 #include <memory>
@@ -29,9 +29,11 @@
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
 #include "icing/store/document-id.h"
 #include "icing/store/dynamic-trie-key-mapper.h"
 #include "icing/store/key-mapper.h"
+#include "icing/store/namespace-id.h"
 #include "icing/store/persistent-hash-map-key-mapper.h"
 #include "icing/util/crc32.h"
 #include "icing/util/encode-util.h"
@@ -74,17 +76,20 @@ std::string GetQualifiedIdStoragePath(std::string_view working_path) {
 
 }  // namespace
 
-/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-QualifiedIdJoinIndex::Create(const Filesystem& filesystem,
-                             std::string working_path, bool pre_mapping_fbv,
-                             bool use_persistent_hash_map) {
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::Create(const Filesystem& filesystem,
+                                   std::string working_path,
+                                   bool pre_mapping_fbv,
+                                   bool use_persistent_hash_map) {
   if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
       !filesystem.DirectoryExists(
           GetDocJoinInfoMapperPath(working_path).c_str()) ||
       !filesystem.FileExists(GetQualifiedIdStoragePath(working_path).c_str())) {
     // Discard working_path if any file/directory is missing, and reinitialize.
     if (filesystem.DirectoryExists(working_path.c_str())) {
-      ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
+      ICING_RETURN_IF_ERROR(
+          QualifiedIdJoinIndex::Discard(filesystem, working_path));
     }
     return InitializeNewFiles(filesystem, std::move(working_path),
                               pre_mapping_fbv, use_persistent_hash_map);
@@ -93,7 +98,7 @@ QualifiedIdJoinIndex::Create(const Filesystem& filesystem,
                                  pre_mapping_fbv, use_persistent_hash_map);
 }
 
-QualifiedIdJoinIndex::~QualifiedIdJoinIndex() {
+QualifiedIdJoinIndexImplV1::~QualifiedIdJoinIndexImplV1() {
   if (!PersistToDisk().ok()) {
     ICING_LOG(WARNING) << "Failed to persist qualified id type joinable index "
                           "to disk while destructing "
@@ -101,7 +106,7 @@ QualifiedIdJoinIndex::~QualifiedIdJoinIndex() {
   }
 }
 
-libtextclassifier3::Status QualifiedIdJoinIndex::Put(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Put(
     const DocJoinInfo& doc_join_info, std::string_view ref_qualified_id_str) {
   SetDirty();
 
@@ -128,7 +133,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Put(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndex::Get(
+libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndexImplV1::Get(
     const DocJoinInfo& doc_join_info) const {
   if (!doc_join_info.is_valid()) {
     return absl_ports::InvalidArgumentError(
@@ -144,11 +149,13 @@ libtextclassifier3::StatusOr<std::string_view> QualifiedIdJoinIndex::Get(
   return std::string_view(data, strlen(data));
 }
 
-libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Optimize(
     const std::vector<DocumentId>& document_id_old_to_new,
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
     DocumentId new_last_added_document_id) {
   std::string temp_working_path = working_path_ + "_temp";
-  ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
+  ICING_RETURN_IF_ERROR(
+      QualifiedIdJoinIndex::Discard(filesystem_, temp_working_path));
 
   DestructibleDirectory temp_working_path_ddir(&filesystem_,
                                                std::move(temp_working_path));
@@ -162,9 +169,10 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
     // Transfer all data from the current to new qualified id type joinable
     // index. Also PersistToDisk and destruct the instance after finishing, so
     // we can safely swap directories later.
-    ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndex> new_index,
-                           Create(filesystem_, temp_working_path_ddir.dir(),
-                                  pre_mapping_fbv_, use_persistent_hash_map_));
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> new_index,
+        Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_,
+               use_persistent_hash_map_));
     ICING_RETURN_IF_ERROR(
         TransferIndex(document_id_old_to_new, new_index.get()));
     new_index->set_last_added_document_id(new_last_added_document_id);
@@ -216,7 +224,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status QualifiedIdJoinIndex::Clear() {
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::Clear() {
   SetDirty();
 
   doc_join_info_mapper_.reset();
@@ -252,11 +260,12 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Clear() {
   return libtextclassifier3::Status::OK;
 }
 
-/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
-                                         std::string&& working_path,
-                                         bool pre_mapping_fbv,
-                                         bool use_persistent_hash_map) {
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::InitializeNewFiles(const Filesystem& filesystem,
+                                               std::string&& working_path,
+                                               bool pre_mapping_fbv,
+                                               bool use_persistent_hash_map) {
   // Create working directory.
   if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
     return absl_ports::InternalError(
@@ -291,8 +300,8 @@ QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
           /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
 
   // Create instance.
-  auto new_index =
-      std::unique_ptr<QualifiedIdJoinIndex>(new QualifiedIdJoinIndex(
+  auto new_index = std::unique_ptr<QualifiedIdJoinIndexImplV1>(
+      new QualifiedIdJoinIndexImplV1(
           filesystem, std::move(working_path),
           /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
           std::move(doc_join_info_mapper), std::move(qualified_id_storage),
@@ -307,11 +316,11 @@ QualifiedIdJoinIndex::InitializeNewFiles(const Filesystem& filesystem,
   return new_index;
 }
 
-/* static */ libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
-                                              std::string&& working_path,
-                                              bool pre_mapping_fbv,
-                                              bool use_persistent_hash_map) {
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+QualifiedIdJoinIndexImplV1::InitializeExistingFiles(
+    const Filesystem& filesystem, std::string&& working_path,
+    bool pre_mapping_fbv, bool use_persistent_hash_map) {
   // PRead metadata file.
   auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
   if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
@@ -358,8 +367,8 @@ QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
           /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0));
 
   // Create instance.
-  auto type_joinable_index =
-      std::unique_ptr<QualifiedIdJoinIndex>(new QualifiedIdJoinIndex(
+  auto type_joinable_index = std::unique_ptr<QualifiedIdJoinIndexImplV1>(
+      new QualifiedIdJoinIndexImplV1(
           filesystem, std::move(working_path), std::move(metadata_buffer),
           std::move(doc_join_info_mapper), std::move(qualified_id_storage),
           pre_mapping_fbv, use_persistent_hash_map));
@@ -374,9 +383,9 @@ QualifiedIdJoinIndex::InitializeExistingFiles(const Filesystem& filesystem,
   return type_joinable_index;
 }
 
-libtextclassifier3::Status QualifiedIdJoinIndex::TransferIndex(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::TransferIndex(
     const std::vector<DocumentId>& document_id_old_to_new,
-    QualifiedIdJoinIndex* new_index) const {
+    QualifiedIdJoinIndexImplV1* new_index) const {
   std::unique_ptr<KeyMapper<int32_t>::Iterator> iter =
       doc_join_info_mapper_->GetIterator();
   while (iter->Advance()) {
@@ -404,7 +413,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::TransferIndex(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::PersistMetadataToDisk(
     bool force) {
   if (!force && !is_info_dirty() && !is_storage_dirty()) {
     return libtextclassifier3::Status::OK;
@@ -429,7 +438,7 @@ libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk(
+libtextclassifier3::Status QualifiedIdJoinIndexImplV1::PersistStoragesToDisk(
     bool force) {
   if (!force && !is_storage_dirty()) {
     return libtextclassifier3::Status::OK;
@@ -440,8 +449,8 @@ libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::StatusOr<Crc32> QualifiedIdJoinIndex::ComputeInfoChecksum(
-    bool force) {
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV1::ComputeInfoChecksum(bool force) {
   if (!force && !is_info_dirty()) {
     return Crc32(crcs().component_crcs.info_crc);
   }
@@ -450,7 +459,7 @@ libtextclassifier3::StatusOr<Crc32> QualifiedIdJoinIndex::ComputeInfoChecksum(
 }
 
 libtextclassifier3::StatusOr<Crc32>
-QualifiedIdJoinIndex::ComputeStoragesChecksum(bool force) {
+QualifiedIdJoinIndexImplV1::ComputeStoragesChecksum(bool force) {
   if (!force && !is_storage_dirty()) {
     return Crc32(crcs().component_crcs.storages_crc);
   }
diff --git a/icing/join/qualified-id-join-index-impl-v1.h b/icing/join/qualified-id-join-index-impl-v1.h
new file mode 100644
index 0000000..9314602
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v1.h
@@ -0,0 +1,327 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndexImplV1: a class to maintain data mapping DocJoinInfo to
+// joinable qualified ids and delete propagation info.
+class QualifiedIdJoinIndexImplV1 : public QualifiedIdJoinIndex {
+ public:
+  struct Info {
+    static constexpr int32_t kMagic = 0x48cabdc6;
+
+    int32_t magic;
+    DocumentId last_added_document_id;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 8, "");
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+  static constexpr int32_t kInfoMetadataBufferOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 20, "");
+
+  // Creates a QualifiedIdJoinIndexImplV1 instance to store qualified ids for
+  // future joining search. If any of the underlying file is missing, then
+  // delete the whole working_path and (re)initialize with new ones. Otherwise
+  // initialize and create the instance by existing files.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               QualifiedIdJoinIndexImplV1 uses working path as working
+  //               directory and all related files will be stored under this
+  //               directory. It takes full ownership and of working_path_,
+  //               including creation/deletion. It is the caller's
+  //               responsibility to specify correct working path and avoid
+  //               mixing different persistent storages together under the same
+  //               path. Also the caller has the ownership for the parent
+  //               directory of working_path_, and it is responsible for parent
+  //               directory creation/deletion. See PersistentStorage for more
+  //               details about the concept of working_path.
+  // pre_mapping_fbv: flag indicating whether memory map max possible file size
+  //                  for underlying FileBackedVector before growing the actual
+  //                  file size.
+  // use_persistent_hash_map: flag indicating whether use persistent hash map as
+  //                          the key mapper (if false, then fall back to
+  //                          dynamic trie key mapper).
+  //
+  // Returns:
+  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                               checksum
+  //   - INTERNAL_ERROR on I/O errors
+  //   - Any KeyMapper errors
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+  // Delete copy and move constructor/assignment operator.
+  QualifiedIdJoinIndexImplV1(const QualifiedIdJoinIndexImplV1&) = delete;
+  QualifiedIdJoinIndexImplV1& operator=(const QualifiedIdJoinIndexImplV1&) =
+      delete;
+
+  QualifiedIdJoinIndexImplV1(QualifiedIdJoinIndexImplV1&&) = delete;
+  QualifiedIdJoinIndexImplV1& operator=(QualifiedIdJoinIndexImplV1&&) = delete;
+
+  ~QualifiedIdJoinIndexImplV1() override;
+
+  // v2 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
+                                 JoinablePropertyId joinable_property_id,
+                                 DocumentId document_id,
+                                 std::vector<NamespaceFingerprintIdentifier>&&
+                                     ref_namespace_fingerprint_ids) override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // v2 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+  GetIterator(SchemaTypeId schema_type_id,
+              JoinablePropertyId joinable_property_id) const override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
+  // references to ref_qualified_id_str (the identifier of another document).
+  //
+  // REQUIRES: ref_qualified_id_str contains no '\0'.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+  //   - Any KeyMapper errors
+  libtextclassifier3::Status Put(
+      const DocJoinInfo& doc_join_info,
+      std::string_view ref_qualified_id_str) override;
+
+  // Gets the referenced document's qualified id string by DocJoinInfo.
+  //
+  // Returns:
+  //   - A qualified id string referenced by the given DocJoinInfo (DocumentId,
+  //     JoinablePropertyId) on success
+  //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
+  //   - NOT_FOUND_ERROR if doc_join_info doesn't exist
+  //   - Any KeyMapper errors
+  libtextclassifier3::StatusOr<std::string_view> Get(
+      const DocJoinInfo& doc_join_info) const override;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Qualified id type joinable index will convert all entries to the
+  // new document ids.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - namespace_id_old_to_new: a map for converting old namespace id to new
+  //   namespace id. It is unused in this implementation since we store raw
+  //   qualified id string (which contains raw namespace string).
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the qualified id type joinable
+  //                               index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+  //     an invalid state and the caller should handle it properly (e.g. discard
+  //     and rebuild)
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      DocumentId new_last_added_document_id) override;
+
+  // Clears all data and set last_added_document_id to kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Clear() override;
+
+  bool is_v2() const override { return false; }
+
+  int32_t size() const override { return doc_join_info_mapper_->num_keys(); }
+
+  bool empty() const override { return size() == 0; }
+
+  DocumentId last_added_document_id() const override {
+    return info().last_added_document_id;
+  }
+
+  void set_last_added_document_id(DocumentId document_id) override {
+    SetInfoDirty();
+
+    Info& info_ref = info();
+    if (info_ref.last_added_document_id == kInvalidDocumentId ||
+        document_id > info_ref.last_added_document_id) {
+      info_ref.last_added_document_id = document_id;
+    }
+  }
+
+ private:
+  explicit QualifiedIdJoinIndexImplV1(
+      const Filesystem& filesystem, std::string&& working_path,
+      std::unique_ptr<uint8_t[]> metadata_buffer,
+      std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
+      std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
+      bool pre_mapping_fbv, bool use_persistent_hash_map)
+      : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
+        metadata_buffer_(std::move(metadata_buffer)),
+        doc_join_info_mapper_(std::move(doc_join_info_mapper)),
+        qualified_id_storage_(std::move(qualified_id_storage)),
+        pre_mapping_fbv_(pre_mapping_fbv),
+        use_persistent_hash_map_(use_persistent_hash_map),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+                     bool pre_mapping_fbv, bool use_persistent_hash_map);
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV1>>
+  InitializeExistingFiles(const Filesystem& filesystem,
+                          std::string&& working_path, bool pre_mapping_fbv,
+                          bool use_persistent_hash_map);
+
+  // Transfers qualified id join index data from the current to new_index and
+  // convert to new document id according to document_id_old_to_new. It is a
+  // helper function for Optimize.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      QualifiedIdJoinIndexImplV1* new_index) const;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+                                    kCrcsMetadataBufferOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+                                          kCrcsMetadataBufferOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+                                    kInfoMetadataBufferOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+                                          kInfoMetadataBufferOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  // Metadata buffer
+  std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+  // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
+  // JoinablePropertyId) to another referenced document's qualified id string
+  // index in qualified_id_storage_.
+  std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+
+  // Storage for qualified id strings.
+  std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
+
+  // TODO(b/268521214): add delete propagation storage
+
+  // Flag indicating whether memory map max possible file size for underlying
+  // FileBackedVector before growing the actual file size.
+  bool pre_mapping_fbv_;
+
+  // Flag indicating whether use persistent hash map as the key mapper (if
+  // false, then fall back to dynamic trie key mapper).
+  bool use_persistent_hash_map_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
diff --git a/icing/join/qualified-id-join-index_test.cc b/icing/join/qualified-id-join-index-impl-v1_test.cc
index 3d59f4b..a6e19bb 100644
--- a/icing/join/qualified-id-join-index_test.cc
+++ b/icing/join/qualified-id-join-index-impl-v1_test.cc
@@ -12,8 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
 
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <string_view>
@@ -49,22 +50,22 @@ using ::testing::Pointee;
 using ::testing::SizeIs;
 
 using Crcs = PersistentStorage::Crcs;
-using Info = QualifiedIdJoinIndex::Info;
+using Info = QualifiedIdJoinIndexImplV1::Info;
 
 static constexpr int32_t kCorruptedValueOffset = 3;
 
-struct QualifiedIdJoinIndexTestParam {
+struct QualifiedIdJoinIndexImplV1TestParam {
   bool pre_mapping_fbv;
   bool use_persistent_hash_map;
 
-  explicit QualifiedIdJoinIndexTestParam(bool pre_mapping_fbv_in,
-                                         bool use_persistent_hash_map_in)
+  explicit QualifiedIdJoinIndexImplV1TestParam(bool pre_mapping_fbv_in,
+                                               bool use_persistent_hash_map_in)
       : pre_mapping_fbv(pre_mapping_fbv_in),
         use_persistent_hash_map(use_persistent_hash_map_in) {}
 };
 
-class QualifiedIdJoinIndexTest
-    : public ::testing::TestWithParam<QualifiedIdJoinIndexTestParam> {
+class QualifiedIdJoinIndexImplV1Test
+    : public ::testing::TestWithParam<QualifiedIdJoinIndexImplV1TestParam> {
  protected:
   void SetUp() override {
     base_dir_ = GetTestTempDir() + "/icing";
@@ -83,26 +84,26 @@ class QualifiedIdJoinIndexTest
   std::string working_path_;
 };
 
-TEST_P(QualifiedIdJoinIndexTest, InvalidWorkingPath) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidWorkingPath) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(
                   filesystem_, "/dev/null/qualified_id_join_index_test",
                   param.pre_mapping_fbv, param.use_persistent_hash_map),
               StatusIs(libtextclassifier3::StatusCode::INTERNAL));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InitializeNewFiles) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     EXPECT_THAT(index, Pointee(IsEmpty()));
 
     ICING_ASSERT_OK(index->PersistToDisk());
@@ -112,23 +113,25 @@ TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
   // sections.
   const std::string metadata_file_path =
       absl_ports::StrCat(working_path_, "/metadata");
-  auto metadata_buffer =
-      std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+  auto metadata_buffer = std::make_unique<uint8_t[]>(
+      QualifiedIdJoinIndexImplV1::kMetadataFileSize);
   ASSERT_THAT(
       filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
-                        QualifiedIdJoinIndex::kMetadataFileSize,
+                        QualifiedIdJoinIndexImplV1::kMetadataFileSize,
                         /*offset=*/0),
       IsTrue());
 
   // Check info section
   const Info* info = reinterpret_cast<const Info*>(
-      metadata_buffer.get() + QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
   EXPECT_THAT(info->magic, Eq(Info::kMagic));
   EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
 
   // Check crcs section
   const Crcs* crcs = reinterpret_cast<const Crcs*>(
-      metadata_buffer.get() + QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
   // There are some initial info in KeyMapper, so storages_crc should be
   // non-zero.
   EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
@@ -143,16 +146,16 @@ TEST_P(QualifiedIdJoinIndexTest, InitializeNewFiles) {
                      .Get()));
 }
 
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
        InitializationShouldFailWithoutPersistToDiskOrDestruction) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   // Create new qualified id join index
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   // Insert some data.
   ICING_ASSERT_OK(
@@ -168,23 +171,24 @@ TEST_P(QualifiedIdJoinIndexTest,
 
   // Without calling PersistToDisk, checksums will not be recomputed or synced
   // to disk, so initializing another instance on the same files should fail.
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           param.use_persistent_hash_map),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
               StatusIs(param.use_persistent_hash_map
                            ? libtextclassifier3::StatusCode::FAILED_PRECONDITION
                            : libtextclassifier3::StatusCode::INTERNAL));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializationShouldSucceedWithPersistToDisk) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   // Create new qualified id join index
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index1,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index1,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   // Insert some data.
   ICING_ASSERT_OK(
@@ -204,10 +208,10 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
   ICING_EXPECT_OK(index1->PersistToDisk());
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index2,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index2,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
   EXPECT_THAT(index2, Pointee(SizeIs(3)));
   EXPECT_THAT(
       index2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)),
@@ -220,16 +224,17 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedWithPersistToDisk) {
       IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriC"));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test,
+       InitializationShouldSucceedAfterDestruction) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
 
     // Insert some data.
     ICING_ASSERT_OK(
@@ -250,10 +255,10 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
     // thus initializing another instance on the same files should succeed, and
     // we should be able to get the same contents.
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     EXPECT_THAT(index, Pointee(SizeIs(3)));
     EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/1,
                                        /*joinable_property_id=*/20)),
@@ -267,17 +272,17 @@ TEST_P(QualifiedIdJoinIndexTest, InitializationShouldSucceedAfterDestruction) {
   }
 }
 
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
        InitializeExistingFilesWithDifferentMagicShouldFail) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     ICING_ASSERT_OK(
         index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
                    /*ref_qualified_id_str=*/"namespace#uriA"));
@@ -292,49 +297,49 @@ TEST_P(QualifiedIdJoinIndexTest,
     ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
     ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
 
-    auto metadata_buffer =
-        std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV1::kMetadataFileSize);
     ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
-                                  QualifiedIdJoinIndex::kMetadataFileSize,
+                                  QualifiedIdJoinIndexImplV1::kMetadataFileSize,
                                   /*offset=*/0),
                 IsTrue());
 
     // Manually change magic and update checksums.
     Crcs* crcs = reinterpret_cast<Crcs*>(
         metadata_buffer.get() +
-        QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+        QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
     Info* info = reinterpret_cast<Info*>(
         metadata_buffer.get() +
-        QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+        QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
     info->magic += kCorruptedValueOffset;
     crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
     crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
-    ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
-                                   metadata_buffer.get(),
-                                   QualifiedIdJoinIndex::kMetadataFileSize),
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV1::kMetadataFileSize),
                 IsTrue());
   }
 
   // Attempt to create the qualified id join index with different magic. This
   // should fail.
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           param.use_persistent_hash_map),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
                        HasSubstr("Incorrect magic value")));
 }
 
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
        InitializeExistingFilesWithWrongAllCrcShouldFail) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     ICING_ASSERT_OK(
         index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
                    /*ref_qualified_id_str=*/"namespace#uriA"));
@@ -348,45 +353,45 @@ TEST_P(QualifiedIdJoinIndexTest,
     ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
     ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
 
-    auto metadata_buffer =
-        std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV1::kMetadataFileSize);
     ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
-                                  QualifiedIdJoinIndex::kMetadataFileSize,
+                                  QualifiedIdJoinIndexImplV1::kMetadataFileSize,
                                   /*offset=*/0),
                 IsTrue());
 
     // Manually corrupt all_crc
     Crcs* crcs = reinterpret_cast<Crcs*>(
         metadata_buffer.get() +
-        QualifiedIdJoinIndex::kCrcsMetadataBufferOffset);
+        QualifiedIdJoinIndexImplV1::kCrcsMetadataBufferOffset);
     crcs->all_crc += kCorruptedValueOffset;
 
-    ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
-                                   metadata_buffer.get(),
-                                   QualifiedIdJoinIndex::kMetadataFileSize),
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV1::kMetadataFileSize),
                 IsTrue());
   }
 
   // Attempt to create the qualified id join index with metadata containing
   // corrupted all_crc. This should fail.
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           param.use_persistent_hash_map),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
                        HasSubstr("Invalid all crc")));
 }
 
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
        InitializeExistingFilesWithCorruptedInfoShouldFail) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     ICING_ASSERT_OK(
         index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
                    /*ref_qualified_id_str=*/"namespace#uriA"));
@@ -400,10 +405,10 @@ TEST_P(QualifiedIdJoinIndexTest,
     ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
     ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
 
-    auto metadata_buffer =
-        std::make_unique<uint8_t[]>(QualifiedIdJoinIndex::kMetadataFileSize);
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV1::kMetadataFileSize);
     ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
-                                  QualifiedIdJoinIndex::kMetadataFileSize,
+                                  QualifiedIdJoinIndexImplV1::kMetadataFileSize,
                                   /*offset=*/0),
                 IsTrue());
 
@@ -411,35 +416,35 @@ TEST_P(QualifiedIdJoinIndexTest,
     // corruption of info.
     Info* info = reinterpret_cast<Info*>(
         metadata_buffer.get() +
-        QualifiedIdJoinIndex::kInfoMetadataBufferOffset);
+        QualifiedIdJoinIndexImplV1::kInfoMetadataBufferOffset);
     info->last_added_document_id += kCorruptedValueOffset;
 
-    ASSERT_THAT(filesystem_.PWrite(metadata_sfd.get(), /*offset=*/0,
-                                   metadata_buffer.get(),
-                                   QualifiedIdJoinIndex::kMetadataFileSize),
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV1::kMetadataFileSize),
                 IsTrue());
   }
 
   // Attempt to create the qualified id join index with info that doesn't match
   // its checksum. This should fail.
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           param.use_persistent_hash_map),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
                        HasSubstr("Invalid info crc")));
 }
 
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
        InitializeExistingFilesWithCorruptedDocJoinInfoMapperShouldFail) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     ICING_ASSERT_OK(
         index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
                    /*ref_qualified_id_str=*/"namespace#uriA"));
@@ -472,24 +477,24 @@ TEST_P(QualifiedIdJoinIndexTest,
 
   // Attempt to create the qualified id join index with corrupted
   // doc_join_info_mapper. This should fail.
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           param.use_persistent_hash_map),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
                        HasSubstr("Invalid storages crc")));
 }
 
-TEST_P(QualifiedIdJoinIndexTest,
+TEST_P(QualifiedIdJoinIndexImplV1Test,
        InitializeExistingFilesWithCorruptedQualifiedIdStorageShouldFail) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     ICING_ASSERT_OK(
         index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
                    /*ref_qualified_id_str=*/"namespace#uriA"));
@@ -518,22 +523,22 @@ TEST_P(QualifiedIdJoinIndexTest,
 
   // Attempt to create the qualified id join index with corrupted
   // qualified_id_storage. This should fail.
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           param.use_persistent_hash_map),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 param.use_persistent_hash_map),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
                        HasSubstr("Invalid storages crc")));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, InvalidPut) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidPut) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   // Create new qualified id join index
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   DocJoinInfo default_invalid;
   EXPECT_THAT(
@@ -541,23 +546,23 @@ TEST_P(QualifiedIdJoinIndexTest, InvalidPut) {
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, InvalidGet) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, InvalidGet) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   // Create new qualified id join index
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   DocJoinInfo default_invalid;
   EXPECT_THAT(index->Get(default_invalid),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, PutAndGet) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
   std::string_view ref_qualified_id_str_a = "namespace#uriA";
@@ -571,10 +576,10 @@ TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
 
     EXPECT_THAT(index->Put(target_info1, ref_qualified_id_str_a), IsOk());
     EXPECT_THAT(index->Put(target_info2, ref_qualified_id_str_b), IsOk());
@@ -590,28 +595,28 @@ TEST_P(QualifiedIdJoinIndexTest, PutAndGet) {
 
   // Verify we can get all of them after destructing and re-initializing.
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
   EXPECT_THAT(index, Pointee(SizeIs(3)));
   EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a));
   EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b));
   EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, GetShouldReturnNotFoundErrorIfNotExist) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, GetShouldReturnNotFoundErrorIfNotExist) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20);
   std::string_view ref_qualified_id_str = "namespace#uriA";
 
   // Create new qualified id join index
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   // Verify entry is not found in the beginning.
   EXPECT_THAT(index->Get(target_info),
@@ -627,14 +632,14 @@ TEST_P(QualifiedIdJoinIndexTest, GetShouldReturnNotFoundErrorIfNotExist) {
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, SetLastAddedDocumentId) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, SetLastAddedDocumentId) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
 
@@ -648,15 +653,15 @@ TEST_P(QualifiedIdJoinIndexTest, SetLastAddedDocumentId) {
 }
 
 TEST_P(
-    QualifiedIdJoinIndexTest,
+    QualifiedIdJoinIndexImplV1Test,
     SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   constexpr DocumentId kDocumentId = 123;
   index->set_last_added_document_id(kDocumentId);
@@ -669,14 +674,14 @@ TEST_P(
   EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, Optimize) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, Optimize) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   ICING_ASSERT_OK(
       index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
@@ -705,7 +710,8 @@ TEST_P(QualifiedIdJoinIndexTest, Optimize) {
 
   DocumentId new_last_added_document_id = 2;
   EXPECT_THAT(
-      index->Optimize(document_id_old_to_new, new_last_added_document_id),
+      index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
+                      new_last_added_document_id),
       IsOk());
   EXPECT_THAT(index, Pointee(SizeIs(3)));
   EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
@@ -750,14 +756,14 @@ TEST_P(QualifiedIdJoinIndexTest, Optimize) {
               IsOkAndHolds("namespace#uriD"));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, OptimizeOutOfRangeDocumentId) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   ICING_ASSERT_OK(
       index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/10),
@@ -770,7 +776,7 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
 
   // There shouldn't be any error due to vector index.
   EXPECT_THAT(
-      index->Optimize(document_id_old_to_new,
+      index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
                       /*new_last_added_document_id=*/kInvalidDocumentId),
       IsOk());
   EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -779,14 +785,14 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeOutOfRangeDocumentId) {
   EXPECT_THAT(index, Pointee(IsEmpty()));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, OptimizeDeleteAll) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
 
   ICING_ASSERT_OK(
       index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10),
@@ -809,7 +815,7 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
   std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
 
   EXPECT_THAT(
-      index->Optimize(document_id_old_to_new,
+      index->Optimize(document_id_old_to_new, /*namespace_id_old_to_new=*/{},
                       /*new_last_added_document_id=*/kInvalidDocumentId),
       IsOk());
   EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -818,8 +824,8 @@ TEST_P(QualifiedIdJoinIndexTest, OptimizeDeleteAll) {
   EXPECT_THAT(index, Pointee(IsEmpty()));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, Clear) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, Clear) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20);
   DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/5);
@@ -827,10 +833,10 @@ TEST_P(QualifiedIdJoinIndexTest, Clear) {
 
   // Create new qualified id join index
   ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<QualifiedIdJoinIndex> index,
-      QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                   param.pre_mapping_fbv,
-                                   param.use_persistent_hash_map));
+      std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+      QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                         param.pre_mapping_fbv,
+                                         param.use_persistent_hash_map));
   ICING_ASSERT_OK(
       index->Put(target_info1, /*ref_qualified_id_str=*/"namespace#uriA"));
   ICING_ASSERT_OK(
@@ -867,9 +873,9 @@ TEST_P(QualifiedIdJoinIndexTest, Clear) {
 
   // Verify index after reconstructing.
   ICING_ASSERT_OK_AND_ASSIGN(
-      index, QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                          param.pre_mapping_fbv,
-                                          param.use_persistent_hash_map));
+      index, QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                param.pre_mapping_fbv,
+                                                param.use_persistent_hash_map));
   EXPECT_THAT(index->last_added_document_id(), Eq(2));
   EXPECT_THAT(index->Get(target_info1),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -880,16 +886,16 @@ TEST_P(QualifiedIdJoinIndexTest, Clear) {
   EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD"));
 }
 
-TEST_P(QualifiedIdJoinIndexTest, SwitchKeyMapperTypeShouldReturnError) {
-  const QualifiedIdJoinIndexTestParam& param = GetParam();
+TEST_P(QualifiedIdJoinIndexImplV1Test, SwitchKeyMapperTypeShouldReturnError) {
+  const QualifiedIdJoinIndexImplV1TestParam& param = GetParam();
 
   {
     // Create new qualified id join index
     ICING_ASSERT_OK_AND_ASSIGN(
-        std::unique_ptr<QualifiedIdJoinIndex> index,
-        QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                     param.pre_mapping_fbv,
-                                     param.use_persistent_hash_map));
+        std::unique_ptr<QualifiedIdJoinIndexImplV1> index,
+        QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                           param.pre_mapping_fbv,
+                                           param.use_persistent_hash_map));
     ICING_ASSERT_OK(
         index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20),
                    /*ref_qualified_id_str=*/"namespace#uriA"));
@@ -898,23 +904,26 @@ TEST_P(QualifiedIdJoinIndexTest, SwitchKeyMapperTypeShouldReturnError) {
   }
 
   bool switch_key_mapper_flag = !param.use_persistent_hash_map;
-  EXPECT_THAT(QualifiedIdJoinIndex::Create(filesystem_, working_path_,
-                                           param.pre_mapping_fbv,
-                                           switch_key_mapper_flag),
+  EXPECT_THAT(QualifiedIdJoinIndexImplV1::Create(filesystem_, working_path_,
+                                                 param.pre_mapping_fbv,
+                                                 switch_key_mapper_flag),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 INSTANTIATE_TEST_SUITE_P(
-    QualifiedIdJoinIndexTest, QualifiedIdJoinIndexTest,
-    testing::Values(
-        QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true,
-                                      /*use_persistent_hash_map_in=*/true),
-        QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true,
-                                      /*use_persistent_hash_map_in=*/false),
-        QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false,
-                                      /*use_persistent_hash_map_in=*/true),
-        QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false,
-                                      /*use_persistent_hash_map_in=*/false)));
+    QualifiedIdJoinIndexImplV1Test, QualifiedIdJoinIndexImplV1Test,
+    testing::Values(QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/true,
+                        /*use_persistent_hash_map_in=*/true),
+                    QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/true,
+                        /*use_persistent_hash_map_in=*/false),
+                    QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/false,
+                        /*use_persistent_hash_map_in=*/true),
+                    QualifiedIdJoinIndexImplV1TestParam(
+                        /*pre_mapping_fbv_in=*/false,
+                        /*use_persistent_hash_map_in=*/false)));
 
 }  // namespace
 
diff --git a/icing/join/qualified-id-join-index-impl-v2.cc b/icing/join/qualified-id-join-index-impl-v2.cc
new file mode 100644
index 0000000..70fd13c
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2.cc
@@ -0,0 +1,681 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-accessor.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/encode-util.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Set 1M for max # of qualified id entries and 10 bytes for key-value bytes.
+// This will take at most 23 MiB disk space and mmap for persistent hash map.
+static constexpr int32_t kSchemaJoinableIdToPostingListMapperMaxNumEntries =
+    1 << 20;
+static constexpr int32_t kSchemaJoinableIdToPostingListMapperAverageKVByteSize =
+    10;
+
+inline DocumentId GetNewDocumentId(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    DocumentId old_document_id) {
+  if (old_document_id >= document_id_old_to_new.size()) {
+    return kInvalidDocumentId;
+  }
+  return document_id_old_to_new[old_document_id];
+}
+
+inline NamespaceId GetNewNamespaceId(
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    NamespaceId namespace_id) {
+  if (namespace_id >= namespace_id_old_to_new.size()) {
+    return kInvalidNamespaceId;
+  }
+  return namespace_id_old_to_new[namespace_id];
+}
+
+libtextclassifier3::StatusOr<PostingListIdentifier> GetPostingListIdentifier(
+    const KeyMapper<PostingListIdentifier>&
+        schema_joinable_id_to_posting_list_mapper,
+    const std::string& encoded_schema_type_joinable_property_id_str) {
+  auto posting_list_identifier_or =
+      schema_joinable_id_to_posting_list_mapper.Get(
+          encoded_schema_type_joinable_property_id_str);
+  if (!posting_list_identifier_or.ok()) {
+    if (absl_ports::IsNotFound(posting_list_identifier_or.status())) {
+      // Not found. Return invalid posting list id.
+      return PostingListIdentifier::kInvalid;
+    }
+    // Real error.
+    return posting_list_identifier_or;
+  }
+  return std::move(posting_list_identifier_or).ValueOrDie();
+}
+
+libtextclassifier3::StatusOr<std::string> EncodeSchemaTypeJoinablePropertyId(
+    SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id) {
+  if (schema_type_id < 0) {
+    return absl_ports::InvalidArgumentError("Invalid schema type id");
+  }
+
+  if (!IsJoinablePropertyIdValid(joinable_property_id)) {
+    return absl_ports::InvalidArgumentError("Invalid joinable property id");
+  }
+
+  static constexpr int kEncodedSchemaTypeIdLength = 3;
+
+  // encoded_schema_type_id_str should be 1 to 3 bytes based on the value of
+  // schema_type_id.
+  std::string encoded_schema_type_id_str =
+      encode_util::EncodeIntToCString(schema_type_id);
+  // Make encoded_schema_type_id_str to fixed kEncodedSchemaTypeIdLength bytes.
+  while (encoded_schema_type_id_str.size() < kEncodedSchemaTypeIdLength) {
+    // C string cannot contain 0 bytes, so we append it using 1, just like what
+    // we do in encode_util::EncodeIntToCString.
+    //
+    // The reason that this works is because DecodeIntToString decodes a byte
+    // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded
+    // schema type id that is less than 3 bytes, it means that the id contains
+    // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+    // 0x01.
+    encoded_schema_type_id_str.push_back(1);
+  }
+
+  return absl_ports::StrCat(
+      encoded_schema_type_id_str,
+      encode_util::EncodeIntToCString(joinable_property_id));
+}
+
+std::string GetMetadataFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/metadata");
+}
+
+std::string GetSchemaJoinableIdToPostingListMapperPath(
+    std::string_view working_path) {
+  return absl_ports::StrCat(working_path,
+                            "/schema_joinable_id_to_posting_list_mapper");
+}
+
+std::string GetFlashIndexStorageFilePath(std::string_view working_path) {
+  return absl_ports::StrCat(working_path, "/flash_index_storage");
+}
+
+}  // namespace
+
+libtextclassifier3::Status
+QualifiedIdJoinIndexImplV2::JoinDataIterator::Advance() {
+  if (pl_accessor_ == nullptr) {
+    return absl_ports::ResourceExhaustedError("End of iterator");
+  }
+
+  if (!should_retrieve_next_batch_) {
+    // In this case, cached_batch_join_data_ is not empty (contains some data
+    // fetched in the previous round), so move curr_ to the next position and
+    // check if we have to fetch the next batch.
+    //
+    // Note: in the 1st round, should_retrieve_next_batch_ is true, so this part
+    // will never be executed.
+    ++curr_;
+    should_retrieve_next_batch_ = curr_ >= cached_batch_join_data_.cend();
+  }
+
+  if (should_retrieve_next_batch_) {
+    // Fetch next batch if needed.
+    ICING_RETURN_IF_ERROR(GetNextDataBatch());
+    should_retrieve_next_batch_ = false;
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status
+QualifiedIdJoinIndexImplV2::JoinDataIterator::GetNextDataBatch() {
+  auto cached_batch_join_data_or = pl_accessor_->GetNextDataBatch();
+  if (!cached_batch_join_data_or.ok()) {
+    ICING_LOG(WARNING)
+        << "Fail to get next batch data from posting list due to: "
+        << cached_batch_join_data_or.status().error_message();
+    return std::move(cached_batch_join_data_or).status();
+  }
+
+  cached_batch_join_data_ = std::move(cached_batch_join_data_or).ValueOrDie();
+  curr_ = cached_batch_join_data_.cbegin();
+
+  if (cached_batch_join_data_.empty()) {
+    return absl_ports::ResourceExhaustedError("End of iterator");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::Create(const Filesystem& filesystem,
+                                   std::string working_path,
+                                   bool pre_mapping_fbv) {
+  if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) ||
+      !filesystem.DirectoryExists(
+          GetSchemaJoinableIdToPostingListMapperPath(working_path).c_str()) ||
+      !filesystem.FileExists(
+          GetFlashIndexStorageFilePath(working_path).c_str())) {
+    // Discard working_path if any file/directory is missing, and reinitialize.
+    if (filesystem.DirectoryExists(working_path.c_str())) {
+      ICING_RETURN_IF_ERROR(
+          QualifiedIdJoinIndex::Discard(filesystem, working_path));
+    }
+    return InitializeNewFiles(filesystem, std::move(working_path),
+                              pre_mapping_fbv);
+  }
+  return InitializeExistingFiles(filesystem, std::move(working_path),
+                                 pre_mapping_fbv);
+}
+
+QualifiedIdJoinIndexImplV2::~QualifiedIdJoinIndexImplV2() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING) << "Failed to persist qualified id join index (v2) to "
+                          "disk while destructing "
+                       << working_path_;
+  }
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Put(
+    SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
+    DocumentId document_id,
+    std::vector<NamespaceFingerprintIdentifier>&&
+        ref_namespace_fingerprint_ids) {
+  std::sort(ref_namespace_fingerprint_ids.begin(),
+            ref_namespace_fingerprint_ids.end());
+
+  // Dedupe.
+  auto last = std::unique(ref_namespace_fingerprint_ids.begin(),
+                          ref_namespace_fingerprint_ids.end());
+  ref_namespace_fingerprint_ids.erase(last,
+                                      ref_namespace_fingerprint_ids.end());
+  if (ref_namespace_fingerprint_ids.empty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  SetDirty();
+  ICING_ASSIGN_OR_RETURN(
+      std::string encoded_schema_type_joinable_property_id_str,
+      EncodeSchemaTypeJoinablePropertyId(schema_type_id, joinable_property_id));
+
+  ICING_ASSIGN_OR_RETURN(
+      PostingListIdentifier posting_list_identifier,
+      GetPostingListIdentifier(*schema_joinable_id_to_posting_list_mapper_,
+                               encoded_schema_type_joinable_property_id_str));
+  std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor;
+  if (posting_list_identifier.is_valid()) {
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_serializer_.get(),
+            posting_list_identifier));
+  } else {
+    ICING_ASSIGN_OR_RETURN(
+        pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::Create(
+            flash_index_storage_.get(), posting_list_serializer_.get()));
+  }
+
+  // Prepend join data into posting list.
+  for (const NamespaceFingerprintIdentifier& ref_namespace_fingerprint_id :
+       ref_namespace_fingerprint_ids) {
+    ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
+        DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+            document_id, ref_namespace_fingerprint_id)));
+  }
+
+  // Finalize the posting list and update mapper.
+  PostingListAccessor::FinalizeResult result =
+      std::move(*pl_accessor).Finalize();
+  if (!result.status.ok()) {
+    return result.status;
+  }
+  if (!result.id.is_valid()) {
+    return absl_ports::InternalError("Fail to flush data into posting list(s)");
+  }
+  ICING_RETURN_IF_ERROR(schema_joinable_id_to_posting_list_mapper_->Put(
+      encoded_schema_type_joinable_property_id_str, result.id));
+
+  // Update info.
+  info().num_data += ref_namespace_fingerprint_ids.size();
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase>>
+QualifiedIdJoinIndexImplV2::GetIterator(
+    SchemaTypeId schema_type_id,
+    JoinablePropertyId joinable_property_id) const {
+  ICING_ASSIGN_OR_RETURN(
+      std::string encoded_schema_type_joinable_property_id_str,
+      EncodeSchemaTypeJoinablePropertyId(schema_type_id, joinable_property_id));
+
+  ICING_ASSIGN_OR_RETURN(
+      PostingListIdentifier posting_list_identifier,
+      GetPostingListIdentifier(*schema_joinable_id_to_posting_list_mapper_,
+                               encoded_schema_type_joinable_property_id_str));
+
+  if (!posting_list_identifier.is_valid()) {
+    return std::make_unique<JoinDataIterator>(nullptr);
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
+      PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+          flash_index_storage_.get(), posting_list_serializer_.get(),
+          posting_list_identifier));
+
+  return std::make_unique<JoinDataIterator>(std::move(pl_accessor));
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Optimize(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    DocumentId new_last_added_document_id) {
+  std::string temp_working_path = working_path_ + "_temp";
+  ICING_RETURN_IF_ERROR(
+      QualifiedIdJoinIndex::Discard(filesystem_, temp_working_path));
+
+  DestructibleDirectory temp_working_path_ddir(&filesystem_,
+                                               std::move(temp_working_path));
+  if (!temp_working_path_ddir.is_valid()) {
+    return absl_ports::InternalError(
+        "Unable to create temp directory to build new qualified id join index "
+        "(v2)");
+  }
+
+  {
+    // Transfer all data from the current to new qualified id join index. Also
+    // PersistToDisk and destruct the instance after finishing, so we can safely
+    // swap directories later.
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> new_index,
+        Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_));
+    ICING_RETURN_IF_ERROR(TransferIndex(
+        document_id_old_to_new, namespace_id_old_to_new, new_index.get()));
+    new_index->set_last_added_document_id(new_last_added_document_id);
+    ICING_RETURN_IF_ERROR(new_index->PersistToDisk());
+  }
+
+  // Destruct current index's storage instances to safely swap directories.
+  // TODO(b/268521214): handle delete propagation storage
+  schema_joinable_id_to_posting_list_mapper_.reset();
+  flash_index_storage_.reset();
+
+  if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
+                             working_path_.c_str())) {
+    return absl_ports::InternalError(
+        "Unable to apply new qualified id join index (v2) due to failed swap");
+  }
+
+  // Reinitialize qualified id join index.
+  if (!filesystem_.PRead(GetMetadataFilePath(working_path_).c_str(),
+                         metadata_buffer_.get(), kMetadataFileSize,
+                         /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read metadata file");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      schema_joinable_id_to_posting_list_mapper_,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem_,
+          GetSchemaJoinableIdToPostingListMapperPath(working_path_),
+          pre_mapping_fbv_,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path_),
+                                &filesystem_, posting_list_serializer_.get()));
+  flash_index_storage_ =
+      std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::Clear() {
+  SetDirty();
+
+  schema_joinable_id_to_posting_list_mapper_.reset();
+  // Discard and reinitialize schema_joinable_id_to_posting_list_mapper.
+  std::string schema_joinable_id_to_posting_list_mapper_path =
+      GetSchemaJoinableIdToPostingListMapperPath(working_path_);
+  ICING_RETURN_IF_ERROR(
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Delete(
+          filesystem_, schema_joinable_id_to_posting_list_mapper_path));
+  ICING_ASSIGN_OR_RETURN(
+      schema_joinable_id_to_posting_list_mapper_,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem_,
+          std::move(schema_joinable_id_to_posting_list_mapper_path),
+          pre_mapping_fbv_,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+  // Discard and reinitialize flash_index_storage.
+  flash_index_storage_.reset();
+  if (!filesystem_.DeleteFile(
+          GetFlashIndexStorageFilePath(working_path_).c_str())) {
+    return absl_ports::InternalError("Fail to delete flash index storage file");
+  }
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path_),
+                                &filesystem_, posting_list_serializer_.get()));
+  flash_index_storage_ =
+      std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
+
+  // TODO(b/268521214): clear delete propagation storage
+
+  info().num_data = 0;
+  info().last_added_document_id = kInvalidDocumentId;
+  return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::InitializeNewFiles(const Filesystem& filesystem,
+                                               std::string&& working_path,
+                                               bool pre_mapping_fbv) {
+  // Create working directory.
+  if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create directory: ", working_path));
+  }
+
+  // Initialize schema_joinable_id_to_posting_list_mapper
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<KeyMapper<PostingListIdentifier>>
+          schema_joinable_id_to_posting_list_mapper,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem, GetSchemaJoinableIdToPostingListMapperPath(working_path),
+          pre_mapping_fbv,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+  // Initialize flash_index_storage
+  auto posting_list_serializer =
+      std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+                                &filesystem, posting_list_serializer.get()));
+
+  // Create instance.
+  auto new_join_index = std::unique_ptr<QualifiedIdJoinIndexImplV2>(
+      new QualifiedIdJoinIndexImplV2(
+          filesystem, std::move(working_path),
+          /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize),
+          std::move(schema_joinable_id_to_posting_list_mapper),
+          std::move(posting_list_serializer),
+          std::make_unique<FlashIndexStorage>(std::move(flash_index_storage)),
+          pre_mapping_fbv));
+  // Initialize info content.
+  new_join_index->info().magic = Info::kMagic;
+  new_join_index->info().num_data = 0;
+  new_join_index->info().last_added_document_id = kInvalidDocumentId;
+  // Initialize new PersistentStorage. The initial checksums will be computed
+  // and set via InitializeNewStorage.
+  ICING_RETURN_IF_ERROR(new_join_index->InitializeNewStorage());
+
+  return new_join_index;
+}
+
+/* static */ libtextclassifier3::StatusOr<
+    std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+QualifiedIdJoinIndexImplV2::InitializeExistingFiles(
+    const Filesystem& filesystem, std::string&& working_path,
+    bool pre_mapping_fbv) {
+  // PRead metadata file.
+  auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize);
+  if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(),
+                        metadata_buffer.get(), kMetadataFileSize,
+                        /*offset=*/0)) {
+    return absl_ports::InternalError("Fail to read metadata file");
+  }
+
+  // Initialize schema_joinable_id_to_posting_list_mapper
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<KeyMapper<PostingListIdentifier>>
+          schema_joinable_id_to_posting_list_mapper,
+      PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+          filesystem, GetSchemaJoinableIdToPostingListMapperPath(working_path),
+          pre_mapping_fbv,
+          /*max_num_entries=*/
+          kSchemaJoinableIdToPostingListMapperMaxNumEntries,
+          /*average_kv_byte_size=*/
+          kSchemaJoinableIdToPostingListMapperAverageKVByteSize));
+
+  // Initialize flash_index_storage
+  auto posting_list_serializer =
+      std::make_unique<PostingListJoinDataSerializer<JoinDataType>>();
+  ICING_ASSIGN_OR_RETURN(
+      FlashIndexStorage flash_index_storage,
+      FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_path),
+                                &filesystem, posting_list_serializer.get()));
+
+  // Create instance.
+  auto join_index = std::unique_ptr<QualifiedIdJoinIndexImplV2>(
+      new QualifiedIdJoinIndexImplV2(
+          filesystem, std::move(working_path), std::move(metadata_buffer),
+          std::move(schema_joinable_id_to_posting_list_mapper),
+          std::move(posting_list_serializer),
+          std::make_unique<FlashIndexStorage>(std::move(flash_index_storage)),
+          pre_mapping_fbv));
+  // Initialize existing PersistentStorage. Checksums will be validated.
+  ICING_RETURN_IF_ERROR(join_index->InitializeExistingStorage());
+
+  // Validate magic.
+  if (join_index->info().magic != Info::kMagic) {
+    return absl_ports::FailedPreconditionError("Incorrect magic value");
+  }
+
+  return join_index;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::TransferIndex(
+    const std::vector<DocumentId>& document_id_old_to_new,
+    const std::vector<NamespaceId>& namespace_id_old_to_new,
+    QualifiedIdJoinIndexImplV2* new_index) const {
+  std::unique_ptr<KeyMapper<PostingListIdentifier>::Iterator> iter =
+      schema_joinable_id_to_posting_list_mapper_->GetIterator();
+
+  // Iterate through all (schema_type_id, joinable_property_id).
+  while (iter->Advance()) {
+    PostingListIdentifier old_pl_id = iter->GetValue();
+    if (!old_pl_id.is_valid()) {
+      // Skip invalid posting list id.
+      continue;
+    }
+
+    // Read all join data from old posting lists and convert to new join data
+    // with new document id, namespace id.
+    std::vector<JoinDataType> new_join_data_vec;
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>
+            old_pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
+            flash_index_storage_.get(), posting_list_serializer_.get(),
+            old_pl_id));
+    ICING_ASSIGN_OR_RETURN(std::vector<JoinDataType> batch_old_join_data,
+                           old_pl_accessor->GetNextDataBatch());
+    while (!batch_old_join_data.empty()) {
+      for (const JoinDataType& old_join_data : batch_old_join_data) {
+        DocumentId new_document_id = GetNewDocumentId(
+            document_id_old_to_new, old_join_data.document_id());
+        NamespaceId new_ref_namespace_id = GetNewNamespaceId(
+            namespace_id_old_to_new, old_join_data.join_info().namespace_id());
+
+        // Transfer if the document and namespace are not deleted or outdated.
+        if (new_document_id != kInvalidDocumentId &&
+            new_ref_namespace_id != kInvalidNamespaceId) {
+          // We can reuse the fingerprint from old_join_data, since document uri
+          // (and its fingerprint) will never change.
+          new_join_data_vec.push_back(JoinDataType(
+              new_document_id, NamespaceFingerprintIdentifier(
+                                   new_ref_namespace_id,
+                                   old_join_data.join_info().fingerprint())));
+        }
+      }
+      ICING_ASSIGN_OR_RETURN(batch_old_join_data,
+                             old_pl_accessor->GetNextDataBatch());
+    }
+
+    if (new_join_data_vec.empty()) {
+      continue;
+    }
+
+    // NamespaceId order may change, so we have to sort the vector.
+    std::sort(new_join_data_vec.begin(), new_join_data_vec.end());
+
+    // Create new posting list in new_index and prepend all new join data into
+    // it.
+    ICING_ASSIGN_OR_RETURN(
+        std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>
+            new_pl_accessor,
+        PostingListJoinDataAccessor<JoinDataType>::Create(
+            new_index->flash_index_storage_.get(),
+            new_index->posting_list_serializer_.get()));
+    for (const JoinDataType& new_join_data : new_join_data_vec) {
+      ICING_RETURN_IF_ERROR(new_pl_accessor->PrependData(new_join_data));
+    }
+
+    // Finalize the posting list and update mapper of new_index.
+    PostingListAccessor::FinalizeResult result =
+        std::move(*new_pl_accessor).Finalize();
+    if (!result.status.ok()) {
+      return result.status;
+    }
+    if (!result.id.is_valid()) {
+      return absl_ports::InternalError(
+          "Fail to flush data into posting list(s)");
+    }
+    ICING_RETURN_IF_ERROR(
+        new_index->schema_joinable_id_to_posting_list_mapper_->Put(
+            iter->GetKey(), result.id));
+
+    // Update info.
+    new_index->info().num_data += new_join_data_vec.size();
+  }
+
+  // TODO(b/268521214): transfer delete propagation storage
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::PersistMetadataToDisk(
+    bool force) {
+  if (!force && !is_info_dirty() && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  std::string metadata_file_path = GetMetadataFilePath(working_path_);
+
+  ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+  if (!sfd.is_valid()) {
+    return absl_ports::InternalError("Fail to open metadata file for write");
+  }
+
+  if (!filesystem_.PWrite(sfd.get(), /*offset=*/0, metadata_buffer_.get(),
+                          kMetadataFileSize)) {
+    return absl_ports::InternalError("Fail to write metadata file");
+  }
+
+  if (!filesystem_.DataSync(sfd.get())) {
+    return absl_ports::InternalError("Fail to sync metadata to disk");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status QualifiedIdJoinIndexImplV2::PersistStoragesToDisk(
+    bool force) {
+  if (!force && !is_storage_dirty()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  ICING_RETURN_IF_ERROR(
+      schema_joinable_id_to_posting_list_mapper_->PersistToDisk());
+  if (!flash_index_storage_->PersistToDisk()) {
+    return absl_ports::InternalError(
+        "Fail to persist FlashIndexStorage to disk");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV2::ComputeInfoChecksum(bool force) {
+  if (!force && !is_info_dirty()) {
+    return Crc32(crcs().component_crcs.info_crc);
+  }
+
+  return info().ComputeChecksum();
+}
+
+libtextclassifier3::StatusOr<Crc32>
+QualifiedIdJoinIndexImplV2::ComputeStoragesChecksum(bool force) {
+  if (!force && !is_storage_dirty()) {
+    return Crc32(crcs().component_crcs.storages_crc);
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      Crc32 schema_joinable_id_to_posting_list_mapper_crc,
+      schema_joinable_id_to_posting_list_mapper_->ComputeChecksum());
+
+  return Crc32(schema_joinable_id_to_posting_list_mapper_crc.Get());
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index-impl-v2.h b/icing/join/qualified-id-join-index-impl-v2.h
new file mode 100644
index 0000000..2b0bf3f
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2.h
@@ -0,0 +1,369 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/posting-list-join-data-accessor.h"
+#include "icing/join/posting-list-join-data-serializer.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// QualifiedIdJoinIndexImplV2: a class to maintain join data (DocumentId to
+// referenced NamespaceFingerprintIdentifier). It stores join data in posting
+// lists and bucketizes them by (schema_type_id, joinable_property_id).
+class QualifiedIdJoinIndexImplV2 : public QualifiedIdJoinIndex {
+ public:
+  using JoinDataType = DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>;
+
+  class JoinDataIterator : public JoinDataIteratorBase {
+   public:
+    explicit JoinDataIterator(
+        std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor)
+        : pl_accessor_(std::move(pl_accessor)),
+          should_retrieve_next_batch_(true) {}
+
+    ~JoinDataIterator() override = default;
+
+    // Advances to the next data.
+    //
+    // Returns:
+    //   - OK on success
+    //   - RESOURCE_EXHAUSTED_ERROR if reaching the end (i.e. no more relevant
+    //     data)
+    //   - Any other PostingListJoinDataAccessor errors
+    libtextclassifier3::Status Advance() override;
+
+    const JoinDataType& GetCurrent() const override { return *curr_; }
+
+   private:
+    // Gets next batch of data from the posting list chain, caches in
+    // cached_batch_integer_index_data_, and sets curr_ to the begin of the
+    // cache.
+    libtextclassifier3::Status GetNextDataBatch();
+
+    std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor_;
+    std::vector<JoinDataType> cached_batch_join_data_;
+    std::vector<JoinDataType>::const_iterator curr_;
+    bool should_retrieve_next_batch_;
+  };
+
+  struct Info {
+    static constexpr int32_t kMagic = 0x12d1c074;
+
+    int32_t magic;
+    int32_t num_data;
+    DocumentId last_added_document_id;
+
+    Crc32 ComputeChecksum() const {
+      return Crc32(
+          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
+    }
+  } __attribute__((packed));
+  static_assert(sizeof(Info) == 12, "");
+
+  // Metadata file layout: <Crcs><Info>
+  static constexpr int32_t kCrcsMetadataBufferOffset = 0;
+  static constexpr int32_t kInfoMetadataBufferOffset =
+      static_cast<int32_t>(sizeof(Crcs));
+  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+  static_assert(kMetadataFileSize == 24, "");
+
+  static constexpr WorkingPathType kWorkingPathType =
+      WorkingPathType::kDirectory;
+
+  // Creates a QualifiedIdJoinIndexImplV2 instance to store join data
+  // (DocumentId to referenced NamespaceFingerPrintIdentifier) for future
+  // joining search. If any of the underlying file is missing, then delete the
+  // whole working_path and (re)initialize with new ones. Otherwise initialize
+  // and create the instance by existing files.
+  //
+  // filesystem: Object to make system level calls
+  // working_path: Specifies the working path for PersistentStorage.
+  //               QualifiedIdJoinIndexImplV2 uses working path as working
+  //               directory and all related files will be stored under this
+  //               directory. It takes full ownership and of working_path_,
+  //               including creation/deletion. It is the caller's
+  //               responsibility to specify correct working path and avoid
+  //               mixing different persistent storages together under the same
+  //               path. Also the caller has the ownership for the parent
+  //               directory of working_path_, and it is responsible for parent
+  //               directory creation/deletion. See PersistentStorage for more
+  //               details about the concept of working_path.
+  // pre_mapping_fbv: flag indicating whether memory map max possible file size
+  //                  for underlying FileBackedVector before growing the actual
+  //                  file size.
+  //
+  // Returns:
+  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+  //                               checksum
+  //   - INTERNAL_ERROR on I/O errors
+  //   - Any KeyMapper errors
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+  Create(const Filesystem& filesystem, std::string working_path,
+         bool pre_mapping_fbv);
+
+  // Delete copy and move constructor/assignment operator.
+  QualifiedIdJoinIndexImplV2(const QualifiedIdJoinIndexImplV2&) = delete;
+  QualifiedIdJoinIndexImplV2& operator=(const QualifiedIdJoinIndexImplV2&) =
+      delete;
+
+  QualifiedIdJoinIndexImplV2(QualifiedIdJoinIndexImplV2&&) = delete;
+  QualifiedIdJoinIndexImplV2& operator=(QualifiedIdJoinIndexImplV2&&) = delete;
+
+  ~QualifiedIdJoinIndexImplV2() override;
+
+  // v1 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::Status Put(
+      const DocJoinInfo& doc_join_info,
+      std::string_view ref_qualified_id_str) override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // v1 only API. Returns UNIMPLEMENTED_ERROR.
+  libtextclassifier3::StatusOr<std::string_view> Get(
+      const DocJoinInfo& doc_join_info) const override {
+    return absl_ports::UnimplementedError("This API is not supported in V2");
+  }
+
+  // Puts a list of referenced (parent) NamespaceFingerprintIdentifiers into
+  // the join index, given the (child) DocumentId, SchemaTypeId and
+  // JoinablePropertyId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
+  //     document_id is invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
+                                 JoinablePropertyId joinable_property_id,
+                                 DocumentId document_id,
+                                 std::vector<NamespaceFingerprintIdentifier>&&
+                                     ref_namespace_fingerprint_ids) override;
+
+  // Returns a JoinDataIterator for iterating through all join data of the
+  // specified (schema_type_id, joinable_property_id).
+  //
+  // Returns:
+  //   - On success: a JoinDataIterator
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
+  //     invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+  GetIterator(SchemaTypeId schema_type_id,
+              JoinablePropertyId joinable_property_id) const override;
+
+  // Reduces internal file sizes by reclaiming space and ids of deleted
+  // documents. Qualified id join index will convert all entries to the new
+  // document ids and namespace ids.
+  //
+  // - document_id_old_to_new: a map for converting old document id to new
+  //   document id.
+  // - namespace_id_old_to_new: a map for converting old namespace id to new
+  //   namespace id.
+  // - new_last_added_document_id: will be used to update the last added
+  //                               document id in the qualified id join index.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
+  //     an invalid state and the caller should handle it properly (e.g. discard
+  //     and rebuild)
+  libtextclassifier3::Status Optimize(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      DocumentId new_last_added_document_id) override;
+
+  // Clears all data and set last_added_document_id to kInvalidDocumentId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status Clear() override;
+
+  bool is_v2() const override { return true; }
+
+  int32_t size() const override { return info().num_data; }
+
+  bool empty() const override { return size() == 0; }
+
+  DocumentId last_added_document_id() const override {
+    return info().last_added_document_id;
+  }
+
+  void set_last_added_document_id(DocumentId document_id) override {
+    SetInfoDirty();
+
+    Info& info_ref = info();
+    if (info_ref.last_added_document_id == kInvalidDocumentId ||
+        document_id > info_ref.last_added_document_id) {
+      info_ref.last_added_document_id = document_id;
+    }
+  }
+
+ private:
+  explicit QualifiedIdJoinIndexImplV2(
+      const Filesystem& filesystem, std::string&& working_path,
+      std::unique_ptr<uint8_t[]> metadata_buffer,
+      std::unique_ptr<KeyMapper<PostingListIdentifier>>
+          schema_joinable_id_to_posting_list_mapper,
+      std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>>
+          posting_list_serializer,
+      std::unique_ptr<FlashIndexStorage> flash_index_storage,
+      bool pre_mapping_fbv)
+      : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
+        metadata_buffer_(std::move(metadata_buffer)),
+        schema_joinable_id_to_posting_list_mapper_(
+            std::move(schema_joinable_id_to_posting_list_mapper)),
+        posting_list_serializer_(std::move(posting_list_serializer)),
+        flash_index_storage_(std::move(flash_index_storage)),
+        pre_mapping_fbv_(pre_mapping_fbv),
+        is_info_dirty_(false),
+        is_storage_dirty_(false) {}
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+                     bool pre_mapping_fbv);
+
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<QualifiedIdJoinIndexImplV2>>
+  InitializeExistingFiles(const Filesystem& filesystem,
+                          std::string&& working_path, bool pre_mapping_fbv);
+
+  // Transfers qualified id join index data from the current to new_index and
+  // convert to new document id according to document_id_old_to_new and
+  // namespace_id_old_to_new. It is a helper function for Optimize.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status TransferIndex(
+      const std::vector<DocumentId>& document_id_old_to_new,
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      QualifiedIdJoinIndexImplV2* new_index) const;
+
+  // Flushes contents of metadata file.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
+
+  // Flushes contents of all storages to underlying files.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INTERNAL_ERROR on I/O error
+  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
+
+  // Computes and returns Info checksum.
+  //
+  // Returns:
+  //   - Crc of the Info on success
+  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
+
+  // Computes and returns all storages checksum.
+  //
+  // Returns:
+  //   - Crc of all storages on success
+  //   - INTERNAL_ERROR if any data inconsistency
+  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override;
+
+  Crcs& crcs() override {
+    return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
+                                    kCrcsMetadataBufferOffset);
+  }
+
+  const Crcs& crcs() const override {
+    return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
+                                          kCrcsMetadataBufferOffset);
+  }
+
+  Info& info() {
+    return *reinterpret_cast<Info*>(metadata_buffer_.get() +
+                                    kInfoMetadataBufferOffset);
+  }
+
+  const Info& info() const {
+    return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
+                                          kInfoMetadataBufferOffset);
+  }
+
+  void SetInfoDirty() { is_info_dirty_ = true; }
+  // When storage is dirty, we have to set info dirty as well. So just expose
+  // SetDirty to set both.
+  void SetDirty() {
+    is_info_dirty_ = true;
+    is_storage_dirty_ = true;
+  }
+
+  bool is_info_dirty() const { return is_info_dirty_; }
+  bool is_storage_dirty() const { return is_storage_dirty_; }
+
+  // Metadata buffer
+  std::unique_ptr<uint8_t[]> metadata_buffer_;
+
+  // Persistent KeyMapper for mapping (schema_type_id, joinable_property_id) to
+  // PostingListIdentifier.
+  std::unique_ptr<KeyMapper<PostingListIdentifier>>
+      schema_joinable_id_to_posting_list_mapper_;
+
+  // Posting list related members. Use posting list to store join data
+  // (document id to referenced NamespaceFingerprintIdentifier).
+  std::unique_ptr<PostingListJoinDataSerializer<JoinDataType>>
+      posting_list_serializer_;
+  std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+
+  // TODO(b/268521214): add delete propagation storage
+
+  // Flag indicating whether memory map max possible file size for underlying
+  // FileBackedVector before growing the actual file size.
+  bool pre_mapping_fbv_;
+
+  bool is_info_dirty_;
+  bool is_storage_dirty_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V2_H_
diff --git a/icing/join/qualified-id-join-index-impl-v2_test.cc b/icing/join/qualified-id-join-index-impl-v2_test.cc
new file mode 100644
index 0000000..d73d6c2
--- /dev/null
+++ b/icing/join/qualified-id-join-index-impl-v2_test.cc
@@ -0,0 +1,1414 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/join/qualified-id-join-index-impl-v2.h"
+
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/persistent-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Ne;
+using ::testing::Not;
+using ::testing::Pointee;
+using ::testing::SizeIs;
+
+using Crcs = PersistentStorage::Crcs;
+using Info = QualifiedIdJoinIndexImplV2::Info;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+
+class QualifiedIdJoinIndexImplV2Test : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    base_dir_ = GetTestTempDir() + "/icing";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    working_path_ = base_dir_ + "/qualified_id_join_index_impl_v2_test";
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::string base_dir_;
+  std::string working_path_;
+};
+
+libtextclassifier3::StatusOr<
+    std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
+GetJoinData(const QualifiedIdJoinIndexImplV2& index,
+            SchemaTypeId schema_type_id,
+            JoinablePropertyId joinable_property_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
+      index.GetIterator(schema_type_id, joinable_property_id));
+
+  std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
+  while (iter->Advance().ok()) {
+    result.push_back(iter->GetCurrent());
+  }
+
+  return result;
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidWorkingPath) {
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(
+                  filesystem_, "/dev/null/qualified_id_join_index_impl_v2_test",
+                  /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InitializeNewFiles) {
+  {
+    // Create new qualified id join index
+    ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    EXPECT_THAT(index, Pointee(IsEmpty()));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Metadata file should be initialized correctly for both info and crcs
+  // sections.
+  const std::string metadata_file_path =
+      absl_ports::StrCat(working_path_, "/metadata");
+  auto metadata_buffer = std::make_unique<uint8_t[]>(
+      QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+  ASSERT_THAT(
+      filesystem_.PRead(metadata_file_path.c_str(), metadata_buffer.get(),
+                        QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                        /*offset=*/0),
+      IsTrue());
+
+  // Check info section
+  const Info* info = reinterpret_cast<const Info*>(
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+  EXPECT_THAT(info->magic, Eq(Info::kMagic));
+  EXPECT_THAT(info->num_data, Eq(0));
+  EXPECT_THAT(info->last_added_document_id, Eq(kInvalidDocumentId));
+
+  // Check crcs section
+  const Crcs* crcs = reinterpret_cast<const Crcs*>(
+      metadata_buffer.get() +
+      QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+  // There are some initial info in KeyMapper, so storages_crc should be
+  // non-zero.
+  EXPECT_THAT(crcs->component_crcs.storages_crc, Ne(0));
+  EXPECT_THAT(crcs->component_crcs.info_crc,
+              Eq(Crc32(std::string_view(reinterpret_cast<const char*>(info),
+                                        sizeof(Info)))
+                     .Get()));
+  EXPECT_THAT(crcs->all_crc,
+              Eq(Crc32(std::string_view(
+                           reinterpret_cast<const char*>(&crcs->component_crcs),
+                           sizeof(Crcs::ComponentCrcs)))
+                     .Get()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  // Insert some data.
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+      /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+  ICING_ASSERT_OK(index->PersistToDisk());
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+      /*ref_namespace_fingerprint_ids=*/{id3}));
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+      /*ref_namespace_fingerprint_ids=*/{id4}));
+
+  // Without calling PersistToDisk, checksums will not be recomputed or synced
+  // to disk, so initializing another instance on the same files should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializationShouldSucceedWithPersistToDisk) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index1,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  // Insert some data.
+  ICING_ASSERT_OK(index1->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+      /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+  ICING_ASSERT_OK(index1->Put(
+      /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+      /*ref_namespace_fingerprint_ids=*/{id3}));
+  ICING_ASSERT_OK(index1->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+      /*ref_namespace_fingerprint_ids=*/{id4}));
+  ASSERT_THAT(index1, Pointee(SizeIs(4)));
+
+  // After calling PersistToDisk, all checksums should be recomputed and synced
+  // correctly to disk, so initializing another instance on the same files
+  // should succeed, and we should be able to get the same contents.
+  ICING_EXPECT_OK(index1->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index2,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index2, Pointee(SizeIs(4)));
+  EXPECT_THAT(
+      GetJoinData(*index2, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/12, /*join_info=*/id4),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/5, /*join_info=*/id2),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/5, /*join_info=*/id1))));
+  EXPECT_THAT(
+      GetJoinData(*index2, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/6, /*join_info=*/id3))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializationShouldSucceedAfterDestruction) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+
+    // Insert some data.
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+        /*ref_namespace_fingerprint_ids=*/{id3}));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+        /*ref_namespace_fingerprint_ids=*/{id4}));
+    ASSERT_THAT(index, Pointee(SizeIs(4)));
+  }
+
+  {
+    // The previous instance went out of scope and was destructed. Although we
+    // didn't call PersistToDisk explicitly, the destructor should invoke it and
+    // thus initializing another instance on the same files should succeed, and
+    // we should be able to get the same contents.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    EXPECT_THAT(index, Pointee(SizeIs(4)));
+    EXPECT_THAT(
+        GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+        IsOkAndHolds(
+            ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                            /*document_id=*/12, /*join_info=*/id4),
+                        DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                            /*document_id=*/5, /*join_info=*/id2),
+                        DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                            /*document_id=*/5, /*join_info=*/id1))));
+    EXPECT_THAT(
+        GetJoinData(*index, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+        IsOkAndHolds(
+            ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                /*document_id=*/6, /*join_info=*/id3))));
+  }
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializeExistingFilesWithDifferentMagicShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Manually change magic and update checksum
+    Crcs* crcs = reinterpret_cast<Crcs*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+    Info* info = reinterpret_cast<Info*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+    info->magic += kCorruptedValueOffset;
+    crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+    crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with different magic. This
+  // should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Incorrect magic value")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializeExistingFilesWithWrongAllCrcShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Manually corrupt all_crc
+    Crcs* crcs = reinterpret_cast<Crcs*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kCrcsMetadataBufferOffset);
+    crcs->all_crc += kCorruptedValueOffset;
+
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with metadata containing
+  // corrupted all_crc. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid all crc")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       InitializeExistingFilesWithCorruptedInfoShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  {
+    const std::string metadata_file_path =
+        absl_ports::StrCat(working_path_, "/metadata");
+    ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+    ASSERT_THAT(metadata_sfd.is_valid(), IsTrue());
+
+    auto metadata_buffer = std::make_unique<uint8_t[]>(
+        QualifiedIdJoinIndexImplV2::kMetadataFileSize);
+    ASSERT_THAT(filesystem_.PRead(metadata_sfd.get(), metadata_buffer.get(),
+                                  QualifiedIdJoinIndexImplV2::kMetadataFileSize,
+                                  /*offset=*/0),
+                IsTrue());
+
+    // Modify info, but don't update the checksum. This would be similar to
+    // corruption of info.
+    Info* info = reinterpret_cast<Info*>(
+        metadata_buffer.get() +
+        QualifiedIdJoinIndexImplV2::kInfoMetadataBufferOffset);
+    info->last_added_document_id += kCorruptedValueOffset;
+
+    ASSERT_THAT(filesystem_.PWrite(
+                    metadata_sfd.get(), /*offset=*/0, metadata_buffer.get(),
+                    QualifiedIdJoinIndexImplV2::kMetadataFileSize),
+                IsTrue());
+  }
+
+  // Attempt to create the qualified id join index with info that doesn't match
+  // its checksum. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid info crc")));
+}
+
+TEST_F(
+    QualifiedIdJoinIndexImplV2Test,
+    InitializeExistingFilesWithCorruptedSchemaJoinableIdToPostingListMapperShouldFail) {
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK(index->Put(
+        /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+        /*ref_namespace_fingerprint_ids=*/
+        {NamespaceFingerprintIdentifier(/*namespace_id=*/1,
+                                        /*fingerprint=*/12)}));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Corrupt schema_joinable_id_to_posting_list_mapper manually.
+  {
+    std::string mapper_working_path = absl_ports::StrCat(
+        working_path_, "/schema_joinable_id_to_posting_list_mapper");
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<KeyMapper<PostingListIdentifier>> mapper,
+        PersistentHashMapKeyMapper<PostingListIdentifier>::Create(
+            filesystem_, std::move(mapper_working_path),
+            /*pre_mapping_fbv=*/false));
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum());
+    ICING_ASSERT_OK(mapper->Put("foo", PostingListIdentifier::kInvalid));
+    ICING_ASSERT_OK(mapper->PersistToDisk());
+    ICING_ASSERT_OK_AND_ASSIGN(Crc32 new_crc, mapper->ComputeChecksum());
+    ASSERT_THAT(old_crc, Not(Eq(new_crc)));
+  }
+
+  // Attempt to create the qualified id join index with corrupted
+  // doc_join_info_mapper. This should fail.
+  EXPECT_THAT(QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                 /*pre_mapping_fbv=*/false),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+                       HasSubstr("Invalid storages crc")));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidPut) {
+  NamespaceFingerprintIdentifier id(/*namespace_id=*/1, /*fingerprint=*/12);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(
+      index->Put(/*schema_type_id=*/-1, /*joinable_property_id=*/1,
+                 /*document_id=*/5, /*ref_namespace_fingerprint_ids=*/{id}),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      index->Put(/*schema_type_id=*/2, /*joinable_property_id=*/-1,
+                 /*document_id=*/5, /*ref_namespace_fingerprint_ids=*/{id}),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(index->Put(/*schema_type_id=*/2, /*joinable_property_id=*/1,
+                         /*document_id=*/kInvalidDocumentId,
+                         /*ref_namespace_fingerprint_ids=*/{id}),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, InvalidGetIterator) {
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(
+      index->GetIterator(/*schema_type_id=*/-1, /*joinable_property_id=*/1),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(
+      index->GetIterator(/*schema_type_id=*/2, /*joinable_property_id=*/-1),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       PutEmptyRefNamespaceFingerprintIdsShouldReturnOk) {
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{}),
+      IsOk());
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       PutAndGetSingleSchemaTypeAndJoinableProperty) {
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/78);
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+
+    EXPECT_THAT(
+        index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                   /*ref_namespace_fingerprint_ids=*/{id2, id1}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id, joinable_property_id, /*document_id=*/6,
+                   /*ref_namespace_fingerprint_ids=*/{id3}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id, joinable_property_id, /*document_id=*/12,
+                   /*ref_namespace_fingerprint_ids=*/{id4}),
+        IsOk());
+    EXPECT_THAT(index, Pointee(SizeIs(4)));
+
+    EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/12, /*join_info=*/id4),
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/6, /*join_info=*/id3),
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id1),
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id2))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+                IsOkAndHolds(IsEmpty()));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+                IsOkAndHolds(IsEmpty()));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Verify we can get all of them after destructing and re-initializing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/12, /*join_info=*/id4),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/6, /*join_info=*/id3),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id1),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id2))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id + 1, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id + 1),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test,
+       PutAndGetMultipleSchemaTypesAndJoinableProperties) {
+  SchemaTypeId schema_type_id1 = 2;
+  SchemaTypeId schema_type_id2 = 4;
+
+  JoinablePropertyId joinable_property_id1 = 1;
+  JoinablePropertyId joinable_property_id2 = 10;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/78);
+
+  {
+    // Create new qualified id join index
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+        QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                           /*pre_mapping_fbv=*/false));
+
+    EXPECT_THAT(
+        index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/5,
+                   /*ref_namespace_fingerprint_ids=*/{id1}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id1, joinable_property_id2, /*document_id=*/5,
+                   /*ref_namespace_fingerprint_ids=*/{id2}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id2, joinable_property_id1, /*document_id=*/12,
+                   /*ref_namespace_fingerprint_ids=*/{id3}),
+        IsOk());
+    EXPECT_THAT(
+        index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/12,
+                   /*ref_namespace_fingerprint_ids=*/{id4}),
+        IsOk());
+    EXPECT_THAT(index, Pointee(SizeIs(4)));
+
+    EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id1),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id1))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id2),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/5, /*join_info=*/id2))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id1),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/12, /*join_info=*/id3))));
+    EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id2),
+                IsOkAndHolds(ElementsAre(
+                    DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                        /*document_id=*/12, /*join_info=*/id4))));
+
+    ICING_ASSERT_OK(index->PersistToDisk());
+  }
+
+  // Verify we can get all of them after destructing and re-initializing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id1),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id1))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id1, joinable_property_id2),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/5, /*join_info=*/id2))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id1),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/12, /*join_info=*/id3))));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id2, joinable_property_id2),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/12, /*join_info=*/id4))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, SetLastAddedDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  constexpr DocumentId kDocumentId = 100;
+  index->set_last_added_document_id(kDocumentId);
+  EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 123;
+  index->set_last_added_document_id(kNextDocumentId);
+  EXPECT_THAT(index->last_added_document_id(), Eq(kNextDocumentId));
+}
+
+TEST_F(
+    QualifiedIdJoinIndexImplV2Test,
+    SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  constexpr DocumentId kDocumentId = 123;
+  index->set_last_added_document_id(kDocumentId);
+  ASSERT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+
+  constexpr DocumentId kNextDocumentId = 100;
+  ASSERT_THAT(kNextDocumentId, Lt(kDocumentId));
+  index->set_last_added_document_id(kNextDocumentId);
+  // last_added_document_id() should remain unchanged.
+  EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, Optimize) {
+  // General test for Optimize().
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id1 = 2;
+  SchemaTypeId schema_type_id2 = 5;
+
+  JoinablePropertyId joinable_property_id1 = 11;
+  JoinablePropertyId joinable_property_id2 = 15;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/2, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/3, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/4, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/0, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+  NamespaceFingerprintIdentifier id7(/*namespace_id=*/3, /*fingerprint=*/107);
+  NamespaceFingerprintIdentifier id8(/*namespace_id=*/2, /*fingerprint=*/108);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2, id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id2, joinable_property_id2, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id5, id6}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id7}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id8}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(8)));
+
+  // Delete doc id = 5, 13, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+  document_id_old_to_new[3] = 0;
+  document_id_old_to_new[8] = 1;
+  document_id_old_to_new[21] = 2;
+
+  // Delete namespace id 1, 2 (and invalidate id1, id6, id8). Reorder namespace
+  // ids [0, 3, 4] to [1, 2, 0].
+  std::vector<NamespaceId> namespace_id_old_to_new(5, kInvalidNamespaceId);
+  namespace_id_old_to_new[0] = 1;
+  namespace_id_old_to_new[3] = 2;
+  namespace_id_old_to_new[4] = 0;
+
+  DocumentId new_last_added_document_id = 2;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(3)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // 1) schema_type_id1, joinable_property_id1:
+  //   - old_doc_id=21, old_ref_namespace_id=2: NOT FOUND
+  //   - old_doc_id=13, old_ref_namespace_id=3: NOT FOUND
+  //   - old_doc_id=3, old_ref_namespace_id=4:
+  //     become new_doc_id=0, new_ref_namespace_id=0
+  //   - old_doc_id=3, old_ref_namespace_id=3:
+  //     become new_doc_id=0, new_ref_namespace_id=2
+  //   - old_doc_id=3, old_ref_namespace_id=2: NOT FOUND
+  //
+  // For new_doc_id=0, it should reorder due to posting list restriction.
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id1, joinable_property_id1),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/103)))));
+
+  // 2) schema_type_id2, joinable_property_id2:
+  //   - old_doc_id=8, old_ref_namespace_id=1: NOT FOUND
+  //   - old_doc_id=8, old_ref_namespace_id=0:
+  //     become new_doc_id=1, new_ref_namespace_id=1
+  //   - old_doc_id=5, old_ref_namespace_id=0: NOT FOUND
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id2, joinable_property_id2),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/105)))));
+
+  // Verify Put API should work normally after Optimize().
+  NamespaceFingerprintIdentifier id9(/*namespace_id=*/1, /*fingerprint=*/109);
+  EXPECT_THAT(
+      index->Put(schema_type_id1, joinable_property_id1, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id9}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id1, joinable_property_id1),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/99, /*join_info=*/id9),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/103)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDocumentIdChange) {
+  // Specific test for Optimize(): document id compaction.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/1, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id5}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id6}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+  // Delete doc id = 5, 8, compress and keep the rest.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+  document_id_old_to_new[3] = 0;
+  document_id_old_to_new[13] = 1;
+  document_id_old_to_new[21] = 2;
+
+  // No change for namespace id.
+  std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+  DocumentId new_last_added_document_id = 2;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // - old_doc_id=21, join_info=id6: become doc_id=2, join_info=id6
+  // - old_doc_id=13, join_info=id5: become doc_id=1, join_info=id5
+  // - old_doc_id=8, join_info=id4: NOT FOUND
+  // - old_doc_id=5, join_info=id3: NOT FOUND
+  // - old_doc_id=3, join_info=id2: become doc_id=0, join_info=id2
+  // - old_doc_id=3, join_info=id1: become doc_id=0, join_info=id1
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/2, /*join_info=*/id6),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/1, /*join_info=*/id5),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id2),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id1))));
+
+  // Verify Put API should work normally after Optimize().
+  NamespaceFingerprintIdentifier id7(/*namespace_id=*/1, /*fingerprint=*/107);
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id7}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(5)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(ElementsAre(
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/99, /*join_info=*/id7),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/2, /*join_info=*/id6),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/1, /*join_info=*/id5),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id2),
+                  DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                      /*document_id=*/0, /*join_info=*/id1))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeOutOfRangeDocumentId) {
+  // Specific test for Optimize() for out of range document id.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+  NamespaceFingerprintIdentifier id(/*namespace_id=*/1, /*fingerprint=*/101);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  // Create document_id_old_to_new with size = 1. Optimize should handle out of
+  // range DocumentId properly.
+  std::vector<DocumentId> document_id_old_to_new = {kInvalidDocumentId};
+  std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+  // There shouldn't be any error due to vector index.
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDeleteAllDocuments) {
+  // Specific test for Optimize(): delete all document ids.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/1, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id5}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id6}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+  // Delete all documents.
+  std::vector<DocumentId> document_id_old_to_new(22, kInvalidDocumentId);
+
+  // No change for namespace id.
+  std::vector<NamespaceId> namespace_id_old_to_new = {0, 1};
+
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeNamespaceIdChange) {
+  // Specific test for Optimize(): referenced namespace id compaction.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/3, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/5, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/4, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/0, /*fingerprint=*/104);
+  NamespaceFingerprintIdentifier id5(/*namespace_id=*/2, /*fingerprint=*/105);
+  NamespaceFingerprintIdentifier id6(/*namespace_id=*/1, /*fingerprint=*/106);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/2,
+                 /*ref_namespace_fingerprint_ids=*/{id1}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/3,
+                 /*ref_namespace_fingerprint_ids=*/{id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/5,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/8,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/13,
+                 /*ref_namespace_fingerprint_ids=*/{id5}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/21,
+                 /*ref_namespace_fingerprint_ids=*/{id6}),
+      IsOk());
+  index->set_last_added_document_id(21);
+
+  ASSERT_THAT(index, Pointee(SizeIs(6)));
+
+  // No change for document id.
+  std::vector<DocumentId> document_id_old_to_new(22);
+  std::iota(document_id_old_to_new.begin(), document_id_old_to_new.end(), 0);
+
+  // Delete namespace id 2, 4. Reorder namespace id [0, 1, 3, 5] to [2, 3, 1,
+  // 0].
+  std::vector<NamespaceId> namespace_id_old_to_new(6, kInvalidNamespaceId);
+  namespace_id_old_to_new[0] = 2;
+  namespace_id_old_to_new[1] = 3;
+  namespace_id_old_to_new[3] = 1;
+  namespace_id_old_to_new[5] = 0;
+
+  DocumentId new_last_added_document_id = 21;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // - id6 (old_namespace_id=1): new_namespace_id=3 (document_id = 21)
+  // - id5 (old_namespace_id=2): NOT FOUND
+  // - id4 (old_namespace_id=0): new_namespace_id=2 (document_id = 8)
+  // - id3 (old_namespace_id=4): NOT FOUND
+  // - id2 (old_namespace_id=5): new_namespace_id=0 (document_id = 3)
+  // - id1 (old_namespace_id=3): new_namespace_id=1 (document_id = 2)
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id, joinable_property_id),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/21, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/3, /*fingerprint=*/106)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/8, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/104)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/3, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/2, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/101)))));
+
+  // Verify Put API should work normally after Optimize().
+  NamespaceFingerprintIdentifier id7(/*namespace_id=*/1, /*fingerprint=*/107);
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/99,
+                 /*ref_namespace_fingerprint_ids=*/{id7}),
+      IsOk());
+  index->set_last_added_document_id(99);
+
+  EXPECT_THAT(index, Pointee(SizeIs(5)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(99));
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id, joinable_property_id),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/99, /*join_info=*/id7),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/21, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/3, /*fingerprint=*/106)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/8, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/104)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/3, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/102)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/2, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/101)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeNamespaceIdChangeShouldReorder) {
+  // Specific test for Optimize(): referenced namespace id reorder.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/0, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/103);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/104);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+                 /*ref_namespace_fingerprint_ids=*/{id1, id2, id3}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/1,
+                 /*ref_namespace_fingerprint_ids=*/{id4}),
+      IsOk());
+  index->set_last_added_document_id(1);
+
+  ASSERT_THAT(index, Pointee(SizeIs(4)));
+
+  // No change for document id.
+  std::vector<DocumentId> document_id_old_to_new = {0, 1};
+
+  // Reorder namespace id [0, 1, 2] to [2, 0, 1].
+  std::vector<NamespaceId> namespace_id_old_to_new = {2, 0, 1};
+
+  DocumentId new_last_added_document_id = 1;
+  EXPECT_THAT(index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                              new_last_added_document_id),
+              IsOk());
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(new_last_added_document_id));
+
+  // Verify GetIterator API should work normally after Optimize().
+  // - id4 (old_namespace_id=1): new_namespace_id=0 (document_id = 1)
+  // - id3 (old_namespace_id=2): new_namespace_id=1 (document_id = 0)
+  // - id2 (old_namespace_id=1): new_namespace_id=0 (document_id = 0)
+  // - id1 (old_namespace_id=0): new_namespace_id=2 (document_id = 0)
+  //
+  // Should reorder to [id4, id1, id3, id2] due to posting list restriction.
+  EXPECT_THAT(
+      GetJoinData(*index, schema_type_id, joinable_property_id),
+      IsOkAndHolds(ElementsAre(
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/1, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/104)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/2, /*fingerprint=*/101)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/1, /*fingerprint=*/103)),
+          DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/0, /*join_info=*/NamespaceFingerprintIdentifier(
+                  /*namespace_id=*/0, /*fingerprint=*/102)))));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeOutOfRangeNamespaceId) {
+  // Specific test for Optimize(): out of range referenced namespace id.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+  NamespaceFingerprintIdentifier id(/*namespace_id=*/99, /*fingerprint=*/101);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+                 /*ref_namespace_fingerprint_ids=*/{id}),
+      IsOk());
+  index->set_last_added_document_id(0);
+
+  // Create namespace_id_old_to_new with size = 1. Optimize should handle out of
+  // range NamespaceId properly.
+  std::vector<DocumentId> document_id_old_to_new = {0};
+  std::vector<NamespaceId> namespace_id_old_to_new = {kInvalidNamespaceId};
+
+  // There shouldn't be any error due to vector index.
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, OptimizeDeleteAllNamespaces) {
+  // Specific test for Optimize(): delete all referenced namespace ids.
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+
+  SchemaTypeId schema_type_id = 2;
+  JoinablePropertyId joinable_property_id = 1;
+
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/0, /*fingerprint=*/101);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/102);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/2, /*fingerprint=*/103);
+
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/0,
+                 /*ref_namespace_fingerprint_ids=*/{id1}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/1,
+                 /*ref_namespace_fingerprint_ids=*/{id2}),
+      IsOk());
+  EXPECT_THAT(
+      index->Put(schema_type_id, joinable_property_id, /*document_id=*/2,
+                 /*ref_namespace_fingerprint_ids=*/{id3}),
+      IsOk());
+  index->set_last_added_document_id(3);
+
+  ASSERT_THAT(index, Pointee(SizeIs(3)));
+
+  // No change for document id.
+  std::vector<DocumentId> document_id_old_to_new = {0, 1, 2};
+
+  // Delete all namespaces.
+  std::vector<NamespaceId> namespace_id_old_to_new(3, kInvalidNamespaceId);
+
+  EXPECT_THAT(
+      index->Optimize(document_id_old_to_new, namespace_id_old_to_new,
+                      /*new_last_added_document_id=*/kInvalidDocumentId),
+      IsOk());
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  // Verify all data are discarded after Optimize().
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(GetJoinData(*index, schema_type_id, joinable_property_id),
+              IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexImplV2Test, Clear) {
+  NamespaceFingerprintIdentifier id1(/*namespace_id=*/1, /*fingerprint=*/12);
+  NamespaceFingerprintIdentifier id2(/*namespace_id=*/1, /*fingerprint=*/34);
+  NamespaceFingerprintIdentifier id3(/*namespace_id=*/1, /*fingerprint=*/56);
+  NamespaceFingerprintIdentifier id4(/*namespace_id=*/1, /*fingerprint=*/78);
+
+  // Create new qualified id join index
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexImplV2> index,
+      QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                         /*pre_mapping_fbv=*/false));
+  // Insert some data.
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/5,
+      /*ref_namespace_fingerprint_ids=*/{id2, id1}));
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/3, /*joinable_property_id=*/10, /*document_id=*/6,
+      /*ref_namespace_fingerprint_ids=*/{id3}));
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/12,
+      /*ref_namespace_fingerprint_ids=*/{id4}));
+  ASSERT_THAT(index, Pointee(SizeIs(4)));
+  index->set_last_added_document_id(12);
+  ASSERT_THAT(index->last_added_document_id(), Eq(12));
+
+  // After Clear(), last_added_document_id should be set to kInvalidDocumentId,
+  // and the previous added data should be deleted.
+  EXPECT_THAT(index->Clear(), IsOk());
+  EXPECT_THAT(index, Pointee(IsEmpty()));
+  EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(IsEmpty()));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/3, /*joinable_property_id=*/10),
+      IsOkAndHolds(IsEmpty()));
+
+  // Join index should be able to work normally after Clear().
+  ICING_ASSERT_OK(index->Put(
+      /*schema_type_id=*/2, /*joinable_property_id=*/1, /*document_id=*/20,
+      /*ref_namespace_fingerprint_ids=*/{id4, id2, id1, id3}));
+  index->set_last_added_document_id(20);
+
+  EXPECT_THAT(index, Pointee(SizeIs(4)));
+  EXPECT_THAT(index->last_added_document_id(), Eq(20));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id4),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id3),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id2),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id1))));
+
+  ICING_ASSERT_OK(index->PersistToDisk());
+  index.reset();
+
+  // Verify index after reconstructing.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      index, QualifiedIdJoinIndexImplV2::Create(filesystem_, working_path_,
+                                                /*pre_mapping_fbv=*/false));
+  EXPECT_THAT(index->last_added_document_id(), Eq(20));
+  EXPECT_THAT(
+      GetJoinData(*index, /*schema_type_id=*/2, /*joinable_property_id=*/1),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id4),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id3),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id2),
+                      DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+                          /*document_id=*/20, /*join_info=*/id1))));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-index.h b/icing/join/qualified-id-join-index.h
index 86297cd..4e487f9 100644
--- a/icing/join/qualified-id-join-index.h
+++ b/icing/join/qualified-id-join-index.h
@@ -19,81 +19,42 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <utility>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/persistent-storage.h"
 #include "icing/join/doc-join-info.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
-#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
 #include "icing/util/crc32.h"
 
 namespace icing {
 namespace lib {
 
-// QualifiedIdJoinIndex: a class to maintain data mapping DocJoinInfo to
-// joinable qualified ids and delete propagation info.
+// QualifiedIdJoinIndex: an abstract class to maintain data for qualified id
+// joining.
 class QualifiedIdJoinIndex : public PersistentStorage {
  public:
-  struct Info {
-    static constexpr int32_t kMagic = 0x48cabdc6;
+  class JoinDataIteratorBase {
+   public:
+    virtual ~JoinDataIteratorBase() = default;
 
-    int32_t magic;
-    DocumentId last_added_document_id;
+    virtual libtextclassifier3::Status Advance() = 0;
 
-    Crc32 ComputeChecksum() const {
-      return Crc32(
-          std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
-    }
-  } __attribute__((packed));
-  static_assert(sizeof(Info) == 8, "");
-
-  // Metadata file layout: <Crcs><Info>
-  static constexpr int32_t kCrcsMetadataBufferOffset = 0;
-  static constexpr int32_t kInfoMetadataBufferOffset =
-      static_cast<int32_t>(sizeof(Crcs));
-  static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
-  static_assert(kMetadataFileSize == 20, "");
+    virtual const DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>&
+    GetCurrent() const = 0;
+  };
 
   static constexpr WorkingPathType kWorkingPathType =
       WorkingPathType::kDirectory;
 
-  // Creates a QualifiedIdJoinIndex instance to store qualified ids for future
-  // joining search. If any of the underlying file is missing, then delete the
-  // whole working_path and (re)initialize with new ones. Otherwise initialize
-  // and create the instance by existing files.
-  //
-  // filesystem: Object to make system level calls
-  // working_path: Specifies the working path for PersistentStorage.
-  //               QualifiedIdJoinIndex uses working path as working directory
-  //               and all related files will be stored under this directory. It
-  //               takes full ownership and of working_path_, including
-  //               creation/deletion. It is the caller's responsibility to
-  //               specify correct working path and avoid mixing different
-  //               persistent storages together under the same path. Also the
-  //               caller has the ownership for the parent directory of
-  //               working_path_, and it is responsible for parent directory
-  //               creation/deletion. See PersistentStorage for more details
-  //               about the concept of working_path.
-  // pre_mapping_fbv: flag indicating whether memory map max possible file size
-  //                  for underlying FileBackedVector before growing the actual
-  //                  file size.
-  // use_persistent_hash_map: flag indicating whether use persistent hash map as
-  //                          the key mapper (if false, then fall back to
-  //                          dynamic trie key mapper).
-  //
-  // Returns:
-  //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
-  //                               checksum
-  //   - INTERNAL_ERROR on I/O errors
-  //   - Any KeyMapper errors
-  static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-  Create(const Filesystem& filesystem, std::string working_path,
-         bool pre_mapping_fbv, bool use_persistent_hash_map);
-
   // Deletes QualifiedIdJoinIndex under working_path.
   //
   // Returns:
@@ -105,17 +66,11 @@ class QualifiedIdJoinIndex : public PersistentStorage {
                                       kWorkingPathType);
   }
 
-  // Delete copy and move constructor/assignment operator.
-  QualifiedIdJoinIndex(const QualifiedIdJoinIndex&) = delete;
-  QualifiedIdJoinIndex& operator=(const QualifiedIdJoinIndex&) = delete;
-
-  QualifiedIdJoinIndex(QualifiedIdJoinIndex&&) = delete;
-  QualifiedIdJoinIndex& operator=(QualifiedIdJoinIndex&&) = delete;
+  virtual ~QualifiedIdJoinIndex() override = default;
 
-  ~QualifiedIdJoinIndex() override;
-
-  // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
-  // references to ref_qualified_id_str (the identifier of another document).
+  // (v1 only) Puts a new data into index: DocJoinInfo (DocumentId,
+  // JoinablePropertyId) references to ref_qualified_id_str (the identifier of
+  // another document).
   //
   // REQUIRES: ref_qualified_id_str contains no '\0'.
   //
@@ -123,10 +78,26 @@ class QualifiedIdJoinIndex : public PersistentStorage {
   //   - OK on success
   //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
   //   - Any KeyMapper errors
-  libtextclassifier3::Status Put(const DocJoinInfo& doc_join_info,
-                                 std::string_view ref_qualified_id_str);
+  virtual libtextclassifier3::Status Put(
+      const DocJoinInfo& doc_join_info,
+      std::string_view ref_qualified_id_str) = 0;
 
-  // Gets the referenced document's qualified id string by DocJoinInfo.
+  // (v2 only) Puts a list of referenced NamespaceFingerprintIdentifier into
+  // index, given the DocumentId, SchemaTypeId and JoinablePropertyId.
+  //
+  // Returns:
+  //   - OK on success
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
+  //     document_id is invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  virtual libtextclassifier3::Status Put(
+      SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
+      DocumentId document_id,
+      std::vector<NamespaceFingerprintIdentifier>&&
+          ref_namespace_fingerprint_ids) = 0;
+
+  // (v1 only) Gets the referenced document's qualified id string by
+  // DocJoinInfo.
   //
   // Returns:
   //   - A qualified id string referenced by the given DocJoinInfo (DocumentId,
@@ -134,8 +105,20 @@ class QualifiedIdJoinIndex : public PersistentStorage {
   //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
   //   - NOT_FOUND_ERROR if doc_join_info doesn't exist
   //   - Any KeyMapper errors
-  libtextclassifier3::StatusOr<std::string_view> Get(
-      const DocJoinInfo& doc_join_info) const;
+  virtual libtextclassifier3::StatusOr<std::string_view> Get(
+      const DocJoinInfo& doc_join_info) const = 0;
+
+  // (v2 only) Returns a JoinDataIterator for iterating through all join data of
+  // the specified (schema_type_id, joinable_property_id).
+  //
+  // Returns:
+  //   - On success: a JoinDataIterator
+  //   - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
+  //     invalid
+  //   - Any KeyMapper/FlashIndexStorage errors
+  virtual libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
+  GetIterator(SchemaTypeId schema_type_id,
+              JoinablePropertyId joinable_property_id) const = 0;
 
   // Reduces internal file sizes by reclaiming space and ids of deleted
   // documents. Qualified id type joinable index will convert all entries to the
@@ -143,6 +126,8 @@ class QualifiedIdJoinIndex : public PersistentStorage {
   //
   // - document_id_old_to_new: a map for converting old document id to new
   //   document id.
+  // - namespace_id_old_to_new: a map for converting old namespace id to new
+  //   namespace id.
   // - new_last_added_document_id: will be used to update the last added
   //                               document id in the qualified id type joinable
   //                               index.
@@ -152,154 +137,48 @@ class QualifiedIdJoinIndex : public PersistentStorage {
   //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
   //     an invalid state and the caller should handle it properly (e.g. discard
   //     and rebuild)
-  libtextclassifier3::Status Optimize(
+  virtual libtextclassifier3::Status Optimize(
       const std::vector<DocumentId>& document_id_old_to_new,
-      DocumentId new_last_added_document_id);
+      const std::vector<NamespaceId>& namespace_id_old_to_new,
+      DocumentId new_last_added_document_id) = 0;
 
   // Clears all data and set last_added_document_id to kInvalidDocumentId.
   //
   // Returns:
   //   - OK on success
   //   - INTERNAL_ERROR on I/O error
-  libtextclassifier3::Status Clear();
+  virtual libtextclassifier3::Status Clear() = 0;
 
-  int32_t size() const { return doc_join_info_mapper_->num_keys(); }
+  virtual bool is_v2() const = 0;
 
-  bool empty() const { return size() == 0; }
+  virtual int32_t size() const = 0;
 
-  DocumentId last_added_document_id() const {
-    return info().last_added_document_id;
-  }
+  virtual bool empty() const = 0;
 
-  void set_last_added_document_id(DocumentId document_id) {
-    SetInfoDirty();
+  virtual DocumentId last_added_document_id() const = 0;
 
-    Info& info_ref = info();
-    if (info_ref.last_added_document_id == kInvalidDocumentId ||
-        document_id > info_ref.last_added_document_id) {
-      info_ref.last_added_document_id = document_id;
-    }
-  }
+  virtual void set_last_added_document_id(DocumentId document_id) = 0;
 
- private:
-  explicit QualifiedIdJoinIndex(
-      const Filesystem& filesystem, std::string&& working_path,
-      std::unique_ptr<uint8_t[]> metadata_buffer,
-      std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
-      std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
-      bool pre_mapping_fbv, bool use_persistent_hash_map)
+ protected:
+  explicit QualifiedIdJoinIndex(const Filesystem& filesystem,
+                                std::string&& working_path)
       : PersistentStorage(filesystem, std::move(working_path),
-                          kWorkingPathType),
-        metadata_buffer_(std::move(metadata_buffer)),
-        doc_join_info_mapper_(std::move(doc_join_info_mapper)),
-        qualified_id_storage_(std::move(qualified_id_storage)),
-        pre_mapping_fbv_(pre_mapping_fbv),
-        use_persistent_hash_map_(use_persistent_hash_map),
-        is_info_dirty_(false),
-        is_storage_dirty_(false) {}
-
-  static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-  InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
-                     bool pre_mapping_fbv, bool use_persistent_hash_map);
-
-  static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
-  InitializeExistingFiles(const Filesystem& filesystem,
-                          std::string&& working_path, bool pre_mapping_fbv,
-                          bool use_persistent_hash_map);
-
-  // Transfers qualified id type joinable index data from the current to
-  // new_index and convert to new document id according to
-  // document_id_old_to_new. It is a helper function for Optimize.
-  //
-  // Returns:
-  //   - OK on success
-  //   - INTERNAL_ERROR on I/O error
-  libtextclassifier3::Status TransferIndex(
-      const std::vector<DocumentId>& document_id_old_to_new,
-      QualifiedIdJoinIndex* new_index) const;
-
-  // Flushes contents of metadata file.
-  //
-  // Returns:
-  //   - OK on success
-  //   - INTERNAL_ERROR on I/O error
-  libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
-
-  // Flushes contents of all storages to underlying files.
-  //
-  // Returns:
-  //   - OK on success
-  //   - INTERNAL_ERROR on I/O error
-  libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
-
-  // Computes and returns Info checksum.
-  //
-  // Returns:
-  //   - Crc of the Info on success
-  libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
-
-  // Computes and returns all storages checksum.
-  //
-  // Returns:
-  //   - Crc of all storages on success
-  //   - INTERNAL_ERROR if any data inconsistency
-  libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
-      bool force) override;
-
-  Crcs& crcs() override {
-    return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
-                                    kCrcsMetadataBufferOffset);
-  }
-
-  const Crcs& crcs() const override {
-    return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
-                                          kCrcsMetadataBufferOffset);
-  }
-
-  Info& info() {
-    return *reinterpret_cast<Info*>(metadata_buffer_.get() +
-                                    kInfoMetadataBufferOffset);
-  }
-
-  const Info& info() const {
-    return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
-                                          kInfoMetadataBufferOffset);
-  }
-
-  void SetInfoDirty() { is_info_dirty_ = true; }
-  // When storage is dirty, we have to set info dirty as well. So just expose
-  // SetDirty to set both.
-  void SetDirty() {
-    is_info_dirty_ = true;
-    is_storage_dirty_ = true;
-  }
-
-  bool is_info_dirty() const { return is_info_dirty_; }
-  bool is_storage_dirty() const { return is_storage_dirty_; }
-
-  // Metadata buffer
-  std::unique_ptr<uint8_t[]> metadata_buffer_;
-
-  // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
-  // JoinablePropertyId) to another referenced document's qualified id string
-  // index in qualified_id_storage_.
-  std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
+                          kWorkingPathType) {}
 
-  // Storage for qualified id strings.
-  std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
+  virtual libtextclassifier3::Status PersistStoragesToDisk(
+      bool force) override = 0;
 
-  // TODO(b/268521214): add delete propagation storage
+  virtual libtextclassifier3::Status PersistMetadataToDisk(
+      bool force) override = 0;
 
-  // Flag indicating whether memory map max possible file size for underlying
-  // FileBackedVector before growing the actual file size.
-  bool pre_mapping_fbv_;
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+      bool force) override = 0;
 
-  // Flag indicating whether use persistent hash map as the key mapper (if
-  // false, then fall back to dynamic trie key mapper).
-  bool use_persistent_hash_map_;
+  virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+      bool force) override = 0;
 
-  bool is_info_dirty_;
-  bool is_storage_dirty_;
+  virtual Crcs& crcs() override = 0;
+  virtual const Crcs& crcs() const override = 0;
 };
 
 }  // namespace lib
diff --git a/icing/join/qualified-id-join-indexing-handler-v1_test.cc b/icing/join/qualified-id-join-indexing-handler-v1_test.cc
new file mode 100644
index 0000000..9700132
--- /dev/null
+++ b/icing/join/qualified-id-join-indexing-handler-v1_test.cc
@@ -0,0 +1,558 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/qualified-id-join-index-impl-v1.h"
+#include "icing/join/qualified-id-join-index.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
+#include "icing/join/qualified-id.h"
+#include "icing/portable/platform.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/joinable-property.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::IsTrue;
+
+// Schema type for referenced documents: ReferencedType
+static constexpr std::string_view kReferencedType = "ReferencedType";
+static constexpr std::string_view kPropertyName = "name";
+
+// Joinable properties and joinable property id. Joinable property id is
+// determined by the lexicographical order of joinable property path.
+// Schema type with joinable property: FakeType
+static constexpr std::string_view kFakeType = "FakeType";
+static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
+
+static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
+
+// Schema type with nested joinable properties: NestedType
+static constexpr std::string_view kNestedType = "NestedType";
+static constexpr std::string_view kPropertyNestedDoc = "nested";
+static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
+
+static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
+static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
+
+static constexpr DocumentId kDefaultDocumentId = 3;
+
+// TODO(b/275121148): remove this test after deprecating
+// QualifiedIdJoinIndexImplV1.
+class QualifiedIdJoinIndexingHandlerV1Test : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    base_dir_ = GetTestTempDir() + "/icing_test";
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
+                IsTrue());
+
+    qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
+    schema_store_dir_ = base_dir_ + "/schema_store";
+    doc_store_dir_ = base_dir_ + "/doc_store";
+
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               QualifiedIdJoinIndexImplV1::Create(
+                                   filesystem_, qualified_id_join_index_dir_,
+                                   /*pre_mapping_fbv=*/false,
+                                   /*use_persistent_hash_map=*/false));
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
+
+    ASSERT_THAT(
+        filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
+        IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_,
+        SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+    SchemaProto schema =
+        SchemaBuilder()
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kReferencedType)
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyName)
+                                     .SetDataTypeString(TERM_MATCH_EXACT,
+                                                        TOKENIZER_PLAIN)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
+                PropertyConfigBuilder()
+                    .SetName(kPropertyQualifiedId)
+                    .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                    .SetCardinality(CARDINALITY_OPTIONAL)))
+            .AddType(
+                SchemaTypeConfigBuilder()
+                    .SetType(kNestedType)
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPropertyNestedDoc)
+                            .SetDataTypeDocument(
+                                kFakeType, /*index_nested_properties=*/true)
+                            .SetCardinality(CARDINALITY_OPTIONAL))
+                    .AddProperty(PropertyConfigBuilder()
+                                     .SetName(kPropertyQualifiedId2)
+                                     .SetDataTypeJoinableString(
+                                         JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+                                     .SetCardinality(CARDINALITY_OPTIONAL)))
+            .Build();
+    ICING_ASSERT_OK(schema_store_->SetSchema(
+        schema, /*ignore_errors_and_delete_documents=*/false,
+        /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+                IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/false,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(create_result.document_store);
+  }
+
+  void TearDown() override {
+    doc_store_.reset();
+    schema_store_.reset();
+    lang_segmenter_.reset();
+    qualified_id_join_index_.reset();
+
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::string base_dir_;
+  std::string qualified_id_join_index_dir_;
+  std::string schema_store_dir_;
+  std::string doc_store_dir_;
+
+  std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+};
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       CreationWithNullPointerShouldFail) {
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleJoinableProperty) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      IsOk());
+
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleNestedJoinableProperty) {
+  DocumentProto referenced_document1 =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  DocumentProto referenced_document2 =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/2")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "two")
+          .Build();
+
+  DocumentProto nested_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "nested_type/1")
+          .SetSchema(std::string(kNestedType))
+          .AddDocumentProperty(
+              std::string(kPropertyNestedDoc),
+              DocumentBuilder()
+                  .SetKey("pkg$db/ns", "nested_fake_type/1")
+                  .SetSchema(std::string(kFakeType))
+                  .AddStringProperty(std::string(kPropertyQualifiedId),
+                                     "pkg$db/ns#ref_type/2")
+                  .Build())
+          .AddStringProperty(std::string(kPropertyQualifiedId2),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                nested_document));
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle nested_document.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
+                              /*recovery_mode=*/false,
+                              /*put_document_stats=*/nullptr),
+              IsOk());
+
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/2"));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleShouldSkipInvalidFormatQualifiedId) {
+  static constexpr std::string_view kInvalidFormatQualifiedId =
+      "invalid_format_qualified_id";
+  ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             std::string(kInvalidFormatQualifiedId))
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle document. Should ignore invalid format qualified id.
+  // Index data should remain unchanged since there is no valid qualified id,
+  // but last_added_document_id should be updated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test, HandleShouldSkipEmptyQualifiedId) {
+  // Create a document without any qualified id.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/1")
+                               .SetSchema(std::string(kFakeType))
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
+
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  // Handle document. Index data should remain unchanged since there is no
+  // qualified id, but last_added_document_id should be updated.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handling document with kInvalidDocumentId should cause a failure, and both
+  // index data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Recovery mode should get the same result.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kInvalidDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handling document with document_id < last_added_document_id should cause a
+  // failure, and both index data and last_added_document_id should remain
+  // unchanged.
+  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Handling document with document_id == last_added_document_id should cause a
+  // failure, and both index data and last_added_document_id should remain
+  // unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerV1Test,
+       HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+
+  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+
+  // Handle document with document_id < last_added_document_id in recovery mode.
+  // We should not get any error, but the handler should ignore the document, so
+  // both index data and last_added_document_id should remain unchanged.
+  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId - 1,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Handle document with document_id == last_added_document_id in recovery
+  // mode. We should not get any error, but the handler should ignore the
+  // document, so both index data and last_added_document_id should remain
+  // unchanged.
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Handle document with document_id > last_added_document_id in recovery mode.
+  // The handler should index this document and update last_added_document_id.
+  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, kDefaultDocumentId + 1,
+                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      IsOk());
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kDefaultDocumentId + 1));
+  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
+                  kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)),
+              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/join/qualified-id-join-indexing-handler.cc b/icing/join/qualified-id-join-indexing-handler.cc
index 344cf41..df86cba 100644
--- a/icing/join/qualified-id-join-indexing-handler.cc
+++ b/icing/join/qualified-id-join-indexing-handler.cc
@@ -14,8 +14,13 @@
 
 #include "icing/join/qualified-id-join-indexing-handler.h"
 
+#include <cstdint>
+#include <limits>
 #include <memory>
+#include <optional>
 #include <string_view>
+#include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -26,7 +31,11 @@
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/schema/joinable-property.h"
+#include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
 #include "icing/util/clock.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
@@ -38,12 +47,15 @@ namespace lib {
 /* static */ libtextclassifier3::StatusOr<
     std::unique_ptr<QualifiedIdJoinIndexingHandler>>
 QualifiedIdJoinIndexingHandler::Create(
-    const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index) {
+    const Clock* clock, const DocumentStore* doc_store,
+    QualifiedIdJoinIndex* qualified_id_join_index) {
   ICING_RETURN_ERROR_IF_NULL(clock);
+  ICING_RETURN_ERROR_IF_NULL(doc_store);
   ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
 
   return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
-      new QualifiedIdJoinIndexingHandler(clock, qualified_id_join_index));
+      new QualifiedIdJoinIndexingHandler(clock, doc_store,
+                                         qualified_id_join_index));
 }
 
 libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
@@ -69,30 +81,89 @@ libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
   }
   qualified_id_join_index_.set_last_added_document_id(document_id);
 
-  for (const JoinableProperty<std::string_view>& qualified_id_property :
-       tokenized_document.qualified_id_join_properties()) {
-    if (qualified_id_property.values.empty()) {
-      continue;
+  if (qualified_id_join_index_.is_v2()) {
+    // v2
+    std::optional<DocumentFilterData> filter_data =
+        doc_store_.GetAliveDocumentFilterData(
+            document_id,
+            /*current_time_ms=*/std::numeric_limits<int64_t>::min());
+    if (!filter_data) {
+      // This should not happen.
+      return absl_ports::InternalError(
+          "Failed to get alive document filter data when indexing");
     }
 
-    DocJoinInfo info(document_id, qualified_id_property.metadata.id);
-    // Currently we only support single (non-repeated) joinable value under a
-    // property.
-    std::string_view ref_qualified_id_str = qualified_id_property.values[0];
-
-    // Attempt to parse qualified id string to make sure the format is correct.
-    if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
-      // Skip incorrect format of qualified id string to save disk space.
-      continue;
+    for (const JoinableProperty<std::string_view>& qualified_id_property :
+         tokenized_document.qualified_id_join_properties()) {
+      // Parse all qualified id strings and convert them to
+      // NamespaceFingerprintIdentifier.
+      std::vector<NamespaceFingerprintIdentifier> ref_doc_ns_fingerprint_ids;
+      for (std::string_view ref_qualified_id_str :
+           qualified_id_property.values) {
+        // Attempt to parse qualified id string to make sure the format is
+        // correct.
+        auto ref_qualified_id_or = QualifiedId::Parse(ref_qualified_id_str);
+        if (!ref_qualified_id_or.ok()) {
+          // Skip incorrect format of qualified id string.
+          continue;
+        }
+
+        QualifiedId ref_qualified_id =
+            std::move(ref_qualified_id_or).ValueOrDie();
+        auto ref_namespace_id_or =
+            doc_store_.GetNamespaceId(ref_qualified_id.name_space());
+        if (!ref_namespace_id_or.ok()) {
+          // Skip invalid namespace id.
+          continue;
+        }
+        NamespaceId ref_namespace_id =
+            std::move(ref_namespace_id_or).ValueOrDie();
+
+        ref_doc_ns_fingerprint_ids.push_back(NamespaceFingerprintIdentifier(
+            ref_namespace_id, ref_qualified_id.uri()));
+      }
+
+      // Batch add all join data of this (schema_type_id, joinable_property_id)
+      // into to the index.
+      libtextclassifier3::Status status = qualified_id_join_index_.Put(
+          filter_data->schema_type_id(), qualified_id_property.metadata.id,
+          document_id, std::move(ref_doc_ns_fingerprint_ids));
+      if (!status.ok()) {
+        ICING_LOG(WARNING)
+            << "Failed to add data into qualified id join index v2 due to: "
+            << status.error_message();
+        return status;
+      }
     }
-
-    libtextclassifier3::Status status =
-        qualified_id_join_index_.Put(info, ref_qualified_id_str);
-    if (!status.ok()) {
-      ICING_LOG(WARNING)
-          << "Failed to add data into qualified id join index due to: "
-          << status.error_message();
-      return status;
+  } else {
+    // v1
+    // TODO(b/275121148): deprecate this part after rollout v2.
+    for (const JoinableProperty<std::string_view>& qualified_id_property :
+         tokenized_document.qualified_id_join_properties()) {
+      if (qualified_id_property.values.empty()) {
+        continue;
+      }
+
+      DocJoinInfo info(document_id, qualified_id_property.metadata.id);
+      // Currently we only support single (non-repeated) joinable value under a
+      // property.
+      std::string_view ref_qualified_id_str = qualified_id_property.values[0];
+
+      // Attempt to parse qualified id string to make sure the format is
+      // correct.
+      if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
+        // Skip incorrect format of qualified id string to save disk space.
+        continue;
+      }
+
+      libtextclassifier3::Status status =
+          qualified_id_join_index_.Put(info, ref_qualified_id_str);
+      if (!status.ok()) {
+        ICING_LOG(WARNING)
+            << "Failed to add data into qualified id join index due to: "
+            << status.error_message();
+        return status;
+      }
     }
   }
 
diff --git a/icing/join/qualified-id-join-indexing-handler.h b/icing/join/qualified-id-join-indexing-handler.h
index f44e45d..8a11bf9 100644
--- a/icing/join/qualified-id-join-indexing-handler.h
+++ b/icing/join/qualified-id-join-indexing-handler.h
@@ -15,11 +15,15 @@
 #ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
 #define ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
 
+#include <memory>
+
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/index/data-indexing-handler.h"
 #include "icing/join/qualified-id-join-index.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
 #include "icing/util/clock.h"
 #include "icing/util/tokenized-document.h"
 
@@ -37,7 +41,8 @@ class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
   //   - FAILED_PRECONDITION_ERROR if any of the input pointer is null
   static libtextclassifier3::StatusOr<
       std::unique_ptr<QualifiedIdJoinIndexingHandler>>
-  Create(const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index);
+  Create(const Clock* clock, const DocumentStore* doc_store,
+         QualifiedIdJoinIndex* qualified_id_join_index);
 
   ~QualifiedIdJoinIndexingHandler() override = default;
 
@@ -57,10 +62,13 @@ class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
 
  private:
   explicit QualifiedIdJoinIndexingHandler(
-      const Clock* clock, QualifiedIdJoinIndex* qualified_id_join_index)
+      const Clock* clock, const DocumentStore* doc_store,
+      QualifiedIdJoinIndex* qualified_id_join_index)
       : DataIndexingHandler(clock),
+        doc_store_(*doc_store),
         qualified_id_join_index_(*qualified_id_join_index) {}
 
+  const DocumentStore& doc_store_;                 // Does not own.
   QualifiedIdJoinIndex& qualified_id_join_index_;  // Does not own.
 };
 
diff --git a/icing/join/qualified-id-join-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc
index 7e89dfa..53d35c7 100644
--- a/icing/join/qualified-id-join-indexing-handler_test.cc
+++ b/icing/join/qualified-id-join-indexing-handler_test.cc
@@ -17,12 +17,19 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <utility>
+#include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/document-id-to-join-info.h"
+#include "icing/join/qualified-id-join-index-impl-v2.h"
 #include "icing/join/qualified-id-join-index.h"
 #include "icing/join/qualified-id.h"
 #include "icing/portable/platform.h"
@@ -31,7 +38,11 @@
 #include "icing/schema-builder.h"
 #include "icing/schema/joinable-property.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
+#include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/icu-data-file-helper.h"
@@ -39,6 +50,7 @@
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
+#include "icing/util/status-macros.h"
 #include "icing/util/tokenized-document.h"
 #include "unicode/uloc.h"
 
@@ -47,9 +59,11 @@ namespace lib {
 
 namespace {
 
+using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::IsTrue;
+using ::testing::NotNull;
 
 // Schema type for referenced documents: ReferencedType
 static constexpr std::string_view kReferencedType = "ReferencedType";
@@ -61,18 +75,11 @@ static constexpr std::string_view kPropertyName = "name";
 static constexpr std::string_view kFakeType = "FakeType";
 static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
 
-static constexpr JoinablePropertyId kQualifiedIdJoinablePropertyId = 0;
-
 // Schema type with nested joinable properties: NestedType
 static constexpr std::string_view kNestedType = "NestedType";
 static constexpr std::string_view kPropertyNestedDoc = "nested";
 static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
 
-static constexpr JoinablePropertyId kNestedQualifiedIdJoinablePropertyId = 0;
-static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
-
-static constexpr DocumentId kDefaultDocumentId = 3;
-
 class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
  protected:
   void SetUp() override {
@@ -89,12 +96,12 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
 
     qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
     schema_store_dir_ = base_dir_ + "/schema_store";
+    doc_store_dir_ = base_dir_ + "/doc_store";
 
-    ICING_ASSERT_OK_AND_ASSIGN(
-        qualified_id_join_index_,
-        QualifiedIdJoinIndex::Create(filesystem_, qualified_id_join_index_dir_,
-                                     /*pre_mapping_fbv=*/false,
-                                     /*use_persistent_hash_map=*/false));
+    ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
+                               QualifiedIdJoinIndexImplV2::Create(
+                                   filesystem_, qualified_id_join_index_dir_,
+                                   /*pre_mapping_fbv=*/false));
 
     language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
     ICING_ASSERT_OK_AND_ASSIGN(
@@ -140,9 +147,52 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
     ICING_ASSERT_OK(schema_store_->SetSchema(
         schema, /*ignore_errors_and_delete_documents=*/false,
         /*allow_circular_schema_definitions=*/false));
+
+    ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
+                IsTrue());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentStore::CreateResult create_result,
+        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+                              schema_store_.get(),
+                              /*force_recovery_and_revalidate_documents=*/false,
+                              /*namespace_id_fingerprint=*/true,
+                              /*pre_mapping_fbv=*/false,
+                              /*use_persistent_hash_map=*/false,
+                              PortableFileBackedProtoLog<
+                                  DocumentWrapper>::kDeflateCompressionLevel,
+                              /*initialize_stats=*/nullptr));
+    doc_store_ = std::move(create_result.document_store);
+
+    // Get FakeType related ids.
+    ICING_ASSERT_OK_AND_ASSIGN(fake_type_id_,
+                               schema_store_->GetSchemaTypeId(kFakeType));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        const JoinablePropertyMetadata* metadata1,
+        schema_store_->GetJoinablePropertyMetadata(
+            fake_type_id_, std::string(kPropertyQualifiedId)));
+    ASSERT_THAT(metadata1, NotNull());
+    fake_type_joinable_property_id_ = metadata1->id;
+
+    // Get NestedType related ids.
+    ICING_ASSERT_OK_AND_ASSIGN(nested_type_id_,
+                               schema_store_->GetSchemaTypeId(kNestedType));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        const JoinablePropertyMetadata* metadata2,
+        schema_store_->GetJoinablePropertyMetadata(
+            nested_type_id_,
+            absl_ports::StrCat(kPropertyNestedDoc, ".", kPropertyQualifiedId)));
+    ASSERT_THAT(metadata2, NotNull());
+    nested_type_nested_joinable_property_id_ = metadata2->id;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        const JoinablePropertyMetadata* metadata3,
+        schema_store_->GetJoinablePropertyMetadata(
+            nested_type_id_, std::string(kPropertyQualifiedId2)));
+    ASSERT_THAT(metadata3, NotNull());
+    nested_type_joinable_property_id_ = metadata3->id;
   }
 
   void TearDown() override {
+    doc_store_.reset();
     schema_store_.reset();
     lang_segmenter_.reset();
     qualified_id_join_index_.reset();
@@ -155,30 +205,77 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
   std::string base_dir_;
   std::string qualified_id_join_index_dir_;
   std::string schema_store_dir_;
+  std::string doc_store_dir_;
 
-  std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index_;
+  std::unique_ptr<QualifiedIdJoinIndexImplV2> qualified_id_join_index_;
   std::unique_ptr<LanguageSegmenter> lang_segmenter_;
   std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> doc_store_;
+
+  // FakeType related ids.
+  SchemaTypeId fake_type_id_;
+  JoinablePropertyId fake_type_joinable_property_id_;
+
+  // NestedType related ids.
+  SchemaTypeId nested_type_id_;
+  JoinablePropertyId nested_type_nested_joinable_property_id_;
+  JoinablePropertyId nested_type_joinable_property_id_;
 };
 
+libtextclassifier3::StatusOr<
+    std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
+GetJoinData(const QualifiedIdJoinIndexImplV2& index,
+            SchemaTypeId schema_type_id,
+            JoinablePropertyId joinable_property_id) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
+      index.GetIterator(schema_type_id, joinable_property_id));
+
+  std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
+  while (iter->Advance().ok()) {
+    result.push_back(iter->GetCurrent());
+  }
+
+  return result;
+}
+
 TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
-                  /*clock=*/nullptr, qualified_id_join_index_.get()),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 
-  EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
-                  &fake_clock_, /*qualified_id_join_index=*/nullptr),
-              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  EXPECT_THAT(
+      QualifiedIdJoinIndexingHandler::Create(
+          &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
+      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
   DocumentProto referenced_document =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "ref_type/1")
           .SetSchema(std::string(kReferencedType))
           .AddStringProperty(std::string(kPropertyName), "one")
           .Build();
-
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -186,44 +283,81 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
           .AddStringProperty(std::string(kPropertyQualifiedId),
                              "pkg$db/ns#ref_type/1")
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
+                                std::move(document)));
 
+  // Handle document.
   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
               Eq(kInvalidDocumentId));
-  // Handle document.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId,
-                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
       IsOk());
 
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+  // Verify the state of qualified_id_join_index_ after Handle().
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain
+  // [(doc_id, ref_doc_ns_fingerprint_id)].
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id))));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
+  // Create and put referenced (parent) document1. Get its document id and
+  // namespace id.
   DocumentProto referenced_document1 =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "ref_type/1")
           .SetSchema(std::string(kReferencedType))
           .AddStringProperty(std::string(kPropertyName), "one")
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id1,
+                             doc_store_->Put(referenced_document1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id1,
+      doc_store_->GetNamespaceId(referenced_document1.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id1(
+      /*namespace_id=*/ref_doc_ns_id1,
+      /*target_str=*/referenced_document1.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id1),
+              IsOkAndHolds(ref_doc_id1));
+
+  // Create and put referenced (parent) document2. Get its document id and
+  // namespace id.
   DocumentProto referenced_document2 =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "ref_type/2")
           .SetSchema(std::string(kReferencedType))
           .AddStringProperty(std::string(kPropertyName), "two")
           .Build();
-
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id2,
+                             doc_store_->Put(referenced_document2));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id2,
+      doc_store_->GetNamespaceId(referenced_document2.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id2(
+      /*namespace_id=*/ref_doc_ns_id2,
+      /*target_str=*/referenced_document2.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id2),
+              IsOkAndHolds(ref_doc_id2));
+
+  // Create and put (child) document:
+  // - kPropertyNestedDoc.kPropertyQualifiedId refers to referenced_document2.
+  // - kPropertyQualifiedId2 refers to referenced_document1.
+  //
+  // Also tokenize it.
   DocumentProto nested_document =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "nested_type/1")
@@ -239,31 +373,51 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
           .AddStringProperty(std::string(kPropertyQualifiedId2),
                              "pkg$db/ns#ref_type/1")
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id,
+                             doc_store_->Put(nested_document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
                                 nested_document));
 
+  // Handle nested_document.
   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
               Eq(kInvalidDocumentId));
-  // Handle nested_document.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
-  EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
-                              /*recovery_mode=*/false,
-                              /*put_document_stats=*/nullptr),
-              IsOk());
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
 
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kNestedQualifiedIdJoinablePropertyId)),
-              IsOkAndHolds("pkg$db/ns#ref_type/2"));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedId2JoinablePropertyId)),
-              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+  // Verify the state of qualified_id_join_index_ after Handle().
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+  // (kNestedType, kPropertyNestedDoc.kPropertyQualifiedId) should contain
+  // [(doc_id, ref_doc_ns_fingerprint_id2)].
+  EXPECT_THAT(
+      GetJoinData(
+          *qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
+          /*joinable_property_id=*/nested_type_nested_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id2))));
+  // (kNestedType, kPropertyQualifiedId2) should contain
+  // [(doc_id, ref_doc_ns_fingerprint_id1)].
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
+                  /*joinable_property_id=*/nested_type_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id1))));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest,
@@ -273,6 +427,8 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
   ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
+  // Create and put (child) document with an invalid format referenced qualified
+  // id. Also tokenize it.
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -280,71 +436,133 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
           .AddStringProperty(std::string(kPropertyQualifiedId),
                              std::string(kInvalidFormatQualifiedId))
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
                                 document));
 
+  // Handle document. Should ignore invalid format qualified id.
   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
               Eq(kInvalidDocumentId));
-  // Handle document. Should ignore invalid format qualified id.
-  // Index data should remain unchanged since there is no valid qualified id,
-  // but last_added_document_id should be updated.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId,
-                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
       IsOk());
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Verify the state of qualified_id_join_index_ after Handle(). Index data
+  // should remain unchanged since there is no valid qualified id, but
+  // last_added_document_id should be updated.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleShouldSkipNonExistingNamespace) {
+  static constexpr std::string_view kUnknownNamespace = "UnknownNamespace";
+  // Create and put (child) document which references to a parent qualified id
+  // with an unknown namespace. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(
+              std::string(kPropertyQualifiedId),
+              absl_ports::StrCat(kUnknownNamespace, "#", "ref_type/1"))
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  // Handle document.
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
+  EXPECT_THAT(
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
+      IsOk());
+
+  // Verify the state of qualified_id_join_index_ after Handle().
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should be empty since
+  // "UnknownNamespace#ref_type/1" should be skipped.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
-  // Create a document without any qualified id.
+  // Create and put (child) document without any qualified id. Also tokenize it.
   DocumentProto document = DocumentBuilder()
                                .SetKey("icing", "fake_type/1")
                                .SetSchema(std::string(kFakeType))
                                .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
                                 document));
   ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
 
+  // Handle document.
   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
               Eq(kInvalidDocumentId));
-  // Handle document. Index data should remain unchanged since there is no
-  // qualified id, but last_added_document_id should be updated.
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId,
-                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
       IsOk());
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Verify the state of qualified_id_join_index_ after Handle(). Index data
+  // should remain unchanged since there is no qualified id, but
+  // last_added_document_id should be updated.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest,
        HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
   DocumentProto referenced_document =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "ref_type/1")
           .SetSchema(std::string(kReferencedType))
           .AddStringProperty(std::string(kPropertyName), "one")
           .Build();
-
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -352,31 +570,35 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
           .AddStringProperty(std::string(kPropertyQualifiedId),
                              "pkg$db/ns#ref_type/1")
           .Build();
+  ICING_ASSERT_OK(doc_store_->Put(document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
+                                std::move(document)));
 
-  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
+  qualified_id_join_index_->set_last_added_document_id(ref_doc_id);
   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
+              Eq(ref_doc_id));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
 
-  // Handling document with kInvalidDocumentId should cause a failure, and both
-  // index data and last_added_document_id should remain unchanged.
+  // Handling document with kInvalidDocumentId should cause a failure.
   EXPECT_THAT(
       handler->Handle(tokenized_document, kInvalidDocumentId,
                       /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify the state of qualified_id_join_index_ after Handle(). Both index
+  // data and last_added_document_id should remain unchanged.
   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+              Eq(ref_doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 
   // Recovery mode should get the same result.
   EXPECT_THAT(
@@ -384,21 +606,35 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
                       /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kInvalidDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+              Eq(ref_doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest,
        HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
   DocumentProto referenced_document =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "ref_type/1")
           .SetSchema(std::string(kReferencedType))
           .AddStringProperty(std::string(kPropertyName), "one")
           .Build();
-
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -406,57 +642,75 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
           .AddStringProperty(std::string(kPropertyQualifiedId),
                              "pkg$db/ns#ref_type/1")
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
-
-  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
-  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
+                                std::move(document)));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
 
-  // Handling document with document_id < last_added_document_id should cause a
-  // failure, and both index data and last_added_document_id should remain
-  // unchanged.
-  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+  // Handling document with document_id == last_added_document_id should cause a
+  // failure.
+  qualified_id_join_index_->set_last_added_document_id(doc_id);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId - 1,
-                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  // Verify the state of qualified_id_join_index_ after Handle(). Both index
+  // data and last_added_document_id should remain unchanged.
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 
-  // Handling document with document_id == last_added_document_id should cause a
-  // failure, and both index data and last_added_document_id should remain
-  // unchanged.
+  // Handling document with document_id < last_added_document_id should cause a
+  // failure.
+  qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id + 1));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId,
-                      /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
+                      /*put_document_stats=*/nullptr),
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  // Verify the state of qualified_id_join_index_ after Handle(). Both index
+  // data and last_added_document_id should remain unchanged.
   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+              Eq(doc_id + 1));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(QualifiedIdJoinIndexingHandlerTest,
-       HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
+       HandleRecoveryModeShouldIndexDocsGtLastAddedDocId) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
   DocumentProto referenced_document =
       DocumentBuilder()
           .SetKey("pkg$db/ns", "ref_type/1")
           .SetSchema(std::string(kReferencedType))
           .AddStringProperty(std::string(kPropertyName), "one")
           .Build();
-
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
   DocumentProto document =
       DocumentBuilder()
           .SetKey("icing", "fake_type/1")
@@ -464,60 +718,109 @@ TEST_F(QualifiedIdJoinIndexingHandlerTest,
           .AddStringProperty(std::string(kPropertyQualifiedId),
                              "pkg$db/ns#ref_type/1")
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
   ICING_ASSERT_OK_AND_ASSIGN(
       TokenizedDocument tokenized_document,
       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
-                                document));
-
-  qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId);
-  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
+                                std::move(document)));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
-      QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
                                              qualified_id_join_index_.get()));
 
-  // Handle document with document_id < last_added_document_id in recovery mode.
-  // We should not get any error, but the handler should ignore the document, so
-  // both index data and last_added_document_id should remain unchanged.
-  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue());
+  // Handle document with document_id > last_added_document_id in recovery mode.
+  // The handler should index this document and update last_added_document_id.
+  qualified_id_join_index_->set_last_added_document_id(doc_id - 1);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id - 1));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId - 1,
-                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
       IsOk());
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(
+          ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
+              /*document_id=*/doc_id,
+              /*join_info=*/ref_doc_ns_fingerprint_id))));
+}
+
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
+       HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId) {
+  // Create and put referenced (parent) document. Get its document id and
+  // namespace id.
+  DocumentProto referenced_document =
+      DocumentBuilder()
+          .SetKey("pkg$db/ns", "ref_type/1")
+          .SetSchema(std::string(kReferencedType))
+          .AddStringProperty(std::string(kPropertyName), "one")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
+                             doc_store_->Put(referenced_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      NamespaceId ref_doc_ns_id,
+      doc_store_->GetNamespaceId(referenced_document.namespace_()));
+  NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
+      /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
+  ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
+              IsOkAndHolds(ref_doc_id));
+
+  // Create and put (child) document. Also tokenize it.
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPropertyQualifiedId),
+                             "pkg$db/ns#ref_type/1")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                std::move(document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+      QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
+                                             qualified_id_join_index_.get()));
 
   // Handle document with document_id == last_added_document_id in recovery
   // mode. We should not get any error, but the handler should ignore the
   // document, so both index data and last_added_document_id should remain
   // unchanged.
+  qualified_id_join_index_->set_last_added_document_id(doc_id);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId,
-                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
       IsOk());
-  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId, kQualifiedIdJoinablePropertyId)),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 
-  // Handle document with document_id > last_added_document_id in recovery mode.
-  // The handler should index this document and update last_added_document_id.
-  ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue());
+  // Handle document with document_id < last_added_document_id in recovery mode.
+  // We should not get any error, but the handler should ignore the document, so
+  // both index data and last_added_document_id should remain unchanged.
+  qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
+  ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
+              Eq(doc_id + 1));
   EXPECT_THAT(
-      handler->Handle(tokenized_document, kDefaultDocumentId + 1,
-                      /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
+      handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
+                      /*put_document_stats=*/nullptr),
       IsOk());
   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
-              Eq(kDefaultDocumentId + 1));
-  EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo(
-                  kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)),
-              IsOkAndHolds("pkg$db/ns#ref_type/1"));
+              Eq(doc_id + 1));
+  // (kFakeType, kPropertyQualifiedId) should contain nothing.
+  EXPECT_THAT(
+      GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
+                  /*joinable_property_id=*/fake_type_joinable_property_id_),
+      IsOkAndHolds(IsEmpty()));
 }
 
 }  // namespace